diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a5c8b59c6725f43b68dfdefd11d85ec5be901fc2 --- /dev/null +++ b/README.md @@ -0,0 +1,80 @@ +--- +license: apache-2.0 +base_model: openai/whisper-large-v3 +tags: +- generated_from_trainer +datasets: +- mozilla-foundation/common_voice_16_0 +metrics: +- wer +model-index: +- name: whisper-large-v3-pt-cv16-cuda + results: + - task: + name: Automatic Speech Recognition + type: automatic-speech-recognition + dataset: + name: mozilla-foundation/common_voice_16_0 pt + type: mozilla-foundation/common_voice_16_0 + split: None + args: pt + metrics: + - name: Wer + type: wer + value: 0.9998545572074984 +--- + + + +# whisper-large-v3-pt-cv16-cuda + +This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the mozilla-foundation/common_voice_16_0 pt dataset. +It achieves the following results on the evaluation set: +- Loss: 0.1325 +- Wer: 0.9999 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 1e-06 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: linear +- lr_scheduler_warmup_steps: 2000 +- training_steps: 5000 +- mixed_precision_training: Native AMP + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Wer | +|:-------------:|:-----:|:----:|:---------------:|:------:| +| 0.199 | 0.26 | 1000 | 0.1563 | 0.1124 | +| 0.1654 | 0.52 | 2000 | 0.1500 | 0.1052 | +| 0.1794 | 0.77 | 3000 | 0.1379 | 0.0997 | +| 0.0821 | 1.03 | 4000 | 0.1321 | 1.0007 | +| 0.1292 | 1.29 | 5000 | 0.1325 | 0.9999 | + + +### Framework versions + +- Transformers 4.37.0.dev0 +- Pytorch 2.2.0.dev20231212 +- Datasets 2.15.1.dev0 +- Tokenizers 0.15.0 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..42ef0f53a894a4194b11a1231b28963705a03296 --- /dev/null +++ b/all_results.json @@ -0,0 +1,14 @@ +{ + "epoch": 1.29, + "eval_loss": 0.132488414645195, + "eval_runtime": 171706.1904, + "eval_samples": 9399, + "eval_samples_per_second": 0.055, + "eval_steps_per_second": 0.007, + "eval_wer": 0.9998545572074984, + "train_loss": 0.05041759390830994, + "train_runtime": 1177508.0135, + "train_samples": 30998, + "train_samples_per_second": 0.034, + "train_steps_per_second": 0.004 +} \ No newline at end of file diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..caa8aea5ccb9a4c479137c1f0ded8dc3bcc33fb2 --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-large-v3", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 128, + "pad_token_id": 50256, + "scale_embedding": false, + "torch_dtype": "float32", + "transformers_version": "4.37.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51866 +} diff --git a/checkpoint-1000/generation_config.json b/checkpoint-1000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9c184c23d34e10a8eb965f82773d9f8e3fb90c3d --- /dev/null +++ b/checkpoint-1000/generation_config.json @@ -0,0 +1,264 @@ +{ + "alignment_heads": [ + [ + 7, + 0 + ], + [ + 10, + 17 + ], + [ + 12, + 18 + ], + [ + 13, + 12 + ], + [ + 16, + 1 + ], + [ + 17, + 14 + ], + [ + 19, + 11 + ], + [ + 21, + 4 + ], + [ + 24, + 1 + ], + [ + 25, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50360 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|yue|>": 50358, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50364, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50359, + 50360, + 50361, + 50362, + 50363 + ], + "task_to_id": { + "transcribe": 50360, + "translate": 50359 + }, + "transformers_version": "4.37.0.dev0" +} diff --git a/checkpoint-1000/model-00001-of-00002.safetensors b/checkpoint-1000/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f1025125506fdcc58ff101ababbac69b371201c --- /dev/null +++ b/checkpoint-1000/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14da2025731d949006200c34abfb6aa1be4cc2350ecc27dc6af559603a86225d +size 4993448880 diff --git a/checkpoint-1000/model-00002-of-00002.safetensors b/checkpoint-1000/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10a0d133fd78ff736134d4d4cf30dca68ec72d31 --- /dev/null +++ b/checkpoint-1000/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48b38b2d250388274dbab6a134b7ff4184cb3525599666efa7cdefed5c99d82c +size 1180663192 diff --git a/checkpoint-1000/model.safetensors.index.json b/checkpoint-1000/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..c40d652c9b765dc6b1f8a90b16063b93b7b5c888 --- /dev/null +++ b/checkpoint-1000/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173962240 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9997607899730f99dd1ef7ec9b950fc3c19cdd63 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b7a6a42c8f342e4252296a6744e5aeb820e8d8de08a16c9dd10adc396c1293 +size 12333660476 diff --git a/checkpoint-1000/preprocessor_config.json b/checkpoint-1000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..931c77a740890c46365c7ae0c9d350ba3cca908f --- /dev/null +++ b/checkpoint-1000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 128, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8940433795a5e23f0ce4ca87e4337e0b68d79357 Binary files /dev/null and b/checkpoint-1000/rng_state.pth differ diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8d557372e926d2c1d85614b29b7cfb8ea164a52 Binary files /dev/null and b/checkpoint-1000/scheduler.pt differ diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..689fe6fb372c552c3a3fd474137472ba215ab427 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,270 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.25806451612903225, + "eval_steps": 1000, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 1.1499999999999999e-08, + "loss": 0.7822, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-08, + "loss": 1.3149, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 3.6499999999999996e-08, + "loss": 0.7809, + "step": 75 + }, + { + "epoch": 0.03, + "learning_rate": 4.9e-08, + "loss": 1.2916, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 6.15e-08, + "loss": 0.7645, + "step": 125 + }, + { + "epoch": 0.04, + "learning_rate": 7.399999999999999e-08, + "loss": 1.3379, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 8.649999999999999e-08, + "loss": 0.7722, + "step": 175 + }, + { + "epoch": 0.05, + "learning_rate": 9.9e-08, + "loss": 1.281, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 1.115e-07, + "loss": 0.6998, + "step": 225 + }, + { + "epoch": 0.06, + "learning_rate": 1.24e-07, + "loss": 1.1532, + "step": 250 + }, + { + "epoch": 0.07, + "learning_rate": 1.365e-07, + "loss": 0.583, + "step": 275 + }, + { + "epoch": 0.08, + "learning_rate": 1.49e-07, + "loss": 0.6693, + "step": 300 + }, + { + "epoch": 0.08, + "learning_rate": 1.615e-07, + "loss": 0.3395, + "step": 325 + }, + { + "epoch": 0.09, + "learning_rate": 1.7399999999999997e-07, + "loss": 0.3597, + "step": 350 + }, + { + "epoch": 0.1, + "learning_rate": 1.8649999999999998e-07, + "loss": 0.22, + "step": 375 + }, + { + "epoch": 0.1, + "learning_rate": 1.99e-07, + "loss": 0.3449, + "step": 400 + }, + { + "epoch": 0.11, + "learning_rate": 2.1149999999999998e-07, + "loss": 0.2123, + "step": 425 + }, + { + "epoch": 0.12, + "learning_rate": 2.24e-07, + "loss": 0.3362, + "step": 450 + }, + { + "epoch": 0.12, + "learning_rate": 2.3649999999999998e-07, + "loss": 0.175, + "step": 475 + }, + { + "epoch": 0.13, + "learning_rate": 2.4899999999999997e-07, + "loss": 0.3051, + "step": 500 + }, + { + "epoch": 0.14, + "learning_rate": 2.615e-07, + "loss": 0.176, + "step": 525 + }, + { + "epoch": 0.14, + "learning_rate": 2.74e-07, + "loss": 0.2517, + "step": 550 + }, + { + "epoch": 0.15, + "learning_rate": 2.865e-07, + "loss": 0.1863, + "step": 575 + }, + { + "epoch": 0.15, + "learning_rate": 2.9899999999999996e-07, + "loss": 0.2614, + "step": 600 + }, + { + "epoch": 0.16, + "learning_rate": 3.115e-07, + "loss": 0.1942, + "step": 625 + }, + { + "epoch": 0.17, + "learning_rate": 3.24e-07, + "loss": 0.2901, + "step": 650 + }, + { + "epoch": 0.17, + "learning_rate": 3.3650000000000003e-07, + "loss": 0.1587, + "step": 675 + }, + { + "epoch": 0.18, + "learning_rate": 3.4899999999999996e-07, + "loss": 0.2128, + "step": 700 + }, + { + "epoch": 0.19, + "learning_rate": 3.6149999999999995e-07, + "loss": 0.1647, + "step": 725 + }, + { + "epoch": 0.19, + "learning_rate": 3.74e-07, + "loss": 0.2473, + "step": 750 + }, + { + "epoch": 0.2, + "learning_rate": 3.8649999999999997e-07, + "loss": 0.1415, + "step": 775 + }, + { + "epoch": 0.21, + "learning_rate": 3.99e-07, + "loss": 0.2367, + "step": 800 + }, + { + "epoch": 0.21, + "learning_rate": 4.1149999999999995e-07, + "loss": 0.1498, + "step": 825 + }, + { + "epoch": 0.22, + "learning_rate": 4.24e-07, + "loss": 0.2161, + "step": 850 + }, + { + "epoch": 0.23, + "learning_rate": 4.3649999999999997e-07, + "loss": 0.144, + "step": 875 + }, + { + "epoch": 0.23, + "learning_rate": 4.49e-07, + "loss": 0.2316, + "step": 900 + }, + { + "epoch": 0.24, + "learning_rate": 4.615e-07, + "loss": 0.1449, + "step": 925 + }, + { + "epoch": 0.25, + "learning_rate": 4.7399999999999993e-07, + "loss": 0.1513, + "step": 950 + }, + { + "epoch": 0.25, + "learning_rate": 4.864999999999999e-07, + "loss": 0.1855, + "step": 975 + }, + { + "epoch": 0.26, + "learning_rate": 4.99e-07, + "loss": 0.199, + "step": 1000 + }, + { + "epoch": 0.26, + "eval_loss": 0.15628743171691895, + "eval_runtime": 184706.1006, + "eval_samples_per_second": 0.051, + "eval_steps_per_second": 0.006, + "eval_wer": 0.1124272786037492, + "step": 1000 + } + ], + "logging_steps": 25, + "max_steps": 5000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "total_flos": 2.717998645248e+19, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f9b2ac6d109ba135c37e8de1802d3dfa05ab0bb5 Binary files /dev/null and b/checkpoint-1000/training_args.bin differ diff --git a/checkpoint-2000/config.json b/checkpoint-2000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..caa8aea5ccb9a4c479137c1f0ded8dc3bcc33fb2 --- /dev/null +++ b/checkpoint-2000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-large-v3", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 128, + "pad_token_id": 50256, + "scale_embedding": false, + "torch_dtype": "float32", + "transformers_version": "4.37.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51866 +} diff --git a/checkpoint-2000/generation_config.json b/checkpoint-2000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9c184c23d34e10a8eb965f82773d9f8e3fb90c3d --- /dev/null +++ b/checkpoint-2000/generation_config.json @@ -0,0 +1,264 @@ +{ + "alignment_heads": [ + [ + 7, + 0 + ], + [ + 10, + 17 + ], + [ + 12, + 18 + ], + [ + 13, + 12 + ], + [ + 16, + 1 + ], + [ + 17, + 14 + ], + [ + 19, + 11 + ], + [ + 21, + 4 + ], + [ + 24, + 1 + ], + [ + 25, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50360 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|yue|>": 50358, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50364, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50359, + 50360, + 50361, + 50362, + 50363 + ], + "task_to_id": { + "transcribe": 50360, + "translate": 50359 + }, + "transformers_version": "4.37.0.dev0" +} diff --git a/checkpoint-2000/model-00001-of-00002.safetensors b/checkpoint-2000/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17fb190db90ea05d2df7e15bab0cf3faf754d851 --- /dev/null +++ b/checkpoint-2000/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:156e0fd71ba8d6d1a308460a813997db7d653c1f820bd2d5995abd49692f14a1 +size 4993448880 diff --git a/checkpoint-2000/model-00002-of-00002.safetensors b/checkpoint-2000/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd685f435cd6bee482fee41a667379273cc0b05a --- /dev/null +++ b/checkpoint-2000/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c05a1c27b9a9a440db979127057f5f66ec41e211bf68bb2ea3a227c4403c8e60 +size 1180663192 diff --git a/checkpoint-2000/model.safetensors.index.json b/checkpoint-2000/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..c40d652c9b765dc6b1f8a90b16063b93b7b5c888 --- /dev/null +++ b/checkpoint-2000/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173962240 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-2000/optimizer.pt b/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e12119b1b344c128dceffdcbca9a9193f17313cc --- /dev/null +++ b/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4da10ec2e0bf47d4b44db41bb1bfc93e007a4873e0e2ff5dbc5e667cb0168b4f +size 12333660476 diff --git a/checkpoint-2000/preprocessor_config.json b/checkpoint-2000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..931c77a740890c46365c7ae0c9d350ba3cca908f --- /dev/null +++ b/checkpoint-2000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 128, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-2000/rng_state.pth b/checkpoint-2000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5266f1a0df1c07e66dd192a65c9b980c129eb5b6 Binary files /dev/null and b/checkpoint-2000/rng_state.pth differ diff --git a/checkpoint-2000/scheduler.pt b/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..127cc9accc96cfc23129e9ada9fc59603f1a4317 Binary files /dev/null and b/checkpoint-2000/scheduler.pt differ diff --git a/checkpoint-2000/trainer_state.json b/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e63d36c8f0ba38dac710e86fade92efe8d890bb9 --- /dev/null +++ b/checkpoint-2000/trainer_state.json @@ -0,0 +1,519 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5161290322580645, + "eval_steps": 1000, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 1.1499999999999999e-08, + "loss": 0.7822, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-08, + "loss": 1.3149, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 3.6499999999999996e-08, + "loss": 0.7809, + "step": 75 + }, + { + "epoch": 0.03, + "learning_rate": 4.9e-08, + "loss": 1.2916, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 6.15e-08, + "loss": 0.7645, + "step": 125 + }, + { + "epoch": 0.04, + "learning_rate": 7.399999999999999e-08, + "loss": 1.3379, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 8.649999999999999e-08, + "loss": 0.7722, + "step": 175 + }, + { + "epoch": 0.05, + "learning_rate": 9.9e-08, + "loss": 1.281, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 1.115e-07, + "loss": 0.6998, + "step": 225 + }, + { + "epoch": 0.06, + "learning_rate": 1.24e-07, + "loss": 1.1532, + "step": 250 + }, + { + "epoch": 0.07, + "learning_rate": 1.365e-07, + "loss": 0.583, + "step": 275 + }, + { + "epoch": 0.08, + "learning_rate": 1.49e-07, + "loss": 0.6693, + "step": 300 + }, + { + "epoch": 0.08, + "learning_rate": 1.615e-07, + "loss": 0.3395, + "step": 325 + }, + { + "epoch": 0.09, + "learning_rate": 1.7399999999999997e-07, + "loss": 0.3597, + "step": 350 + }, + { + "epoch": 0.1, + "learning_rate": 1.8649999999999998e-07, + "loss": 0.22, + "step": 375 + }, + { + "epoch": 0.1, + "learning_rate": 1.99e-07, + "loss": 0.3449, + "step": 400 + }, + { + "epoch": 0.11, + "learning_rate": 2.1149999999999998e-07, + "loss": 0.2123, + "step": 425 + }, + { + "epoch": 0.12, + "learning_rate": 2.24e-07, + "loss": 0.3362, + "step": 450 + }, + { + "epoch": 0.12, + "learning_rate": 2.3649999999999998e-07, + "loss": 0.175, + "step": 475 + }, + { + "epoch": 0.13, + "learning_rate": 2.4899999999999997e-07, + "loss": 0.3051, + "step": 500 + }, + { + "epoch": 0.14, + "learning_rate": 2.615e-07, + "loss": 0.176, + "step": 525 + }, + { + "epoch": 0.14, + "learning_rate": 2.74e-07, + "loss": 0.2517, + "step": 550 + }, + { + "epoch": 0.15, + "learning_rate": 2.865e-07, + "loss": 0.1863, + "step": 575 + }, + { + "epoch": 0.15, + "learning_rate": 2.9899999999999996e-07, + "loss": 0.2614, + "step": 600 + }, + { + "epoch": 0.16, + "learning_rate": 3.115e-07, + "loss": 0.1942, + "step": 625 + }, + { + "epoch": 0.17, + "learning_rate": 3.24e-07, + "loss": 0.2901, + "step": 650 + }, + { + "epoch": 0.17, + "learning_rate": 3.3650000000000003e-07, + "loss": 0.1587, + "step": 675 + }, + { + "epoch": 0.18, + "learning_rate": 3.4899999999999996e-07, + "loss": 0.2128, + "step": 700 + }, + { + "epoch": 0.19, + "learning_rate": 3.6149999999999995e-07, + "loss": 0.1647, + "step": 725 + }, + { + "epoch": 0.19, + "learning_rate": 3.74e-07, + "loss": 0.2473, + "step": 750 + }, + { + "epoch": 0.2, + "learning_rate": 3.8649999999999997e-07, + "loss": 0.1415, + "step": 775 + }, + { + "epoch": 0.21, + "learning_rate": 3.99e-07, + "loss": 0.2367, + "step": 800 + }, + { + "epoch": 0.21, + "learning_rate": 4.1149999999999995e-07, + "loss": 0.1498, + "step": 825 + }, + { + "epoch": 0.22, + "learning_rate": 4.24e-07, + "loss": 0.2161, + "step": 850 + }, + { + "epoch": 0.23, + "learning_rate": 4.3649999999999997e-07, + "loss": 0.144, + "step": 875 + }, + { + "epoch": 0.23, + "learning_rate": 4.49e-07, + "loss": 0.2316, + "step": 900 + }, + { + "epoch": 0.24, + "learning_rate": 4.615e-07, + "loss": 0.1449, + "step": 925 + }, + { + "epoch": 0.25, + "learning_rate": 4.7399999999999993e-07, + "loss": 0.1513, + "step": 950 + }, + { + "epoch": 0.25, + "learning_rate": 4.864999999999999e-07, + "loss": 0.1855, + "step": 975 + }, + { + "epoch": 0.26, + "learning_rate": 4.99e-07, + "loss": 0.199, + "step": 1000 + }, + { + "epoch": 0.26, + "eval_loss": 0.15628743171691895, + "eval_runtime": 184706.1006, + "eval_samples_per_second": 0.051, + "eval_steps_per_second": 0.006, + "eval_wer": 0.1124272786037492, + "step": 1000 + }, + { + "epoch": 0.26, + "learning_rate": 5.114999999999999e-07, + "loss": 0.1301, + "step": 1025 + }, + { + "epoch": 0.27, + "learning_rate": 5.24e-07, + "loss": 0.2193, + "step": 1050 + }, + { + "epoch": 0.28, + "learning_rate": 5.365e-07, + "loss": 0.1552, + "step": 1075 + }, + { + "epoch": 0.28, + "learning_rate": 5.490000000000001e-07, + "loss": 0.1865, + "step": 1100 + }, + { + "epoch": 0.29, + "learning_rate": 5.614999999999999e-07, + "loss": 0.1618, + "step": 1125 + }, + { + "epoch": 0.3, + "learning_rate": 5.739999999999999e-07, + "loss": 0.2259, + "step": 1150 + }, + { + "epoch": 0.3, + "learning_rate": 5.865e-07, + "loss": 0.1418, + "step": 1175 + }, + { + "epoch": 0.31, + "learning_rate": 5.989999999999999e-07, + "loss": 0.1916, + "step": 1200 + }, + { + "epoch": 0.32, + "learning_rate": 6.115e-07, + "loss": 0.1295, + "step": 1225 + }, + { + "epoch": 0.32, + "learning_rate": 6.24e-07, + "loss": 0.2112, + "step": 1250 + }, + { + "epoch": 0.33, + "learning_rate": 6.365e-07, + "loss": 0.1231, + "step": 1275 + }, + { + "epoch": 0.34, + "learning_rate": 6.49e-07, + "loss": 0.1914, + "step": 1300 + }, + { + "epoch": 0.34, + "learning_rate": 6.614999999999999e-07, + "loss": 0.1485, + "step": 1325 + }, + { + "epoch": 0.35, + "learning_rate": 6.74e-07, + "loss": 0.1958, + "step": 1350 + }, + { + "epoch": 0.35, + "learning_rate": 6.865e-07, + "loss": 0.1452, + "step": 1375 + }, + { + "epoch": 0.36, + "learning_rate": 6.989999999999999e-07, + "loss": 0.1624, + "step": 1400 + }, + { + "epoch": 0.37, + "learning_rate": 7.115e-07, + "loss": 0.1518, + "step": 1425 + }, + { + "epoch": 0.37, + "learning_rate": 7.24e-07, + "loss": 0.1935, + "step": 1450 + }, + { + "epoch": 0.38, + "learning_rate": 7.365e-07, + "loss": 0.1138, + "step": 1475 + }, + { + "epoch": 0.39, + "learning_rate": 7.489999999999999e-07, + "loss": 0.16, + "step": 1500 + }, + { + "epoch": 0.39, + "learning_rate": 7.614999999999999e-07, + "loss": 0.1279, + "step": 1525 + }, + { + "epoch": 0.4, + "learning_rate": 7.74e-07, + "loss": 0.1862, + "step": 1550 + }, + { + "epoch": 0.41, + "learning_rate": 7.864999999999999e-07, + "loss": 0.1537, + "step": 1575 + }, + { + "epoch": 0.41, + "learning_rate": 7.99e-07, + "loss": 0.1915, + "step": 1600 + }, + { + "epoch": 0.42, + "learning_rate": 8.115e-07, + "loss": 0.1303, + "step": 1625 + }, + { + "epoch": 0.43, + "learning_rate": 8.24e-07, + "loss": 0.1967, + "step": 1650 + }, + { + "epoch": 0.43, + "learning_rate": 8.365e-07, + "loss": 0.1418, + "step": 1675 + }, + { + "epoch": 0.44, + "learning_rate": 8.489999999999999e-07, + "loss": 0.194, + "step": 1700 + }, + { + "epoch": 0.45, + "learning_rate": 8.615e-07, + "loss": 0.1377, + "step": 1725 + }, + { + "epoch": 0.45, + "learning_rate": 8.739999999999999e-07, + "loss": 0.1534, + "step": 1750 + }, + { + "epoch": 0.46, + "learning_rate": 8.864999999999999e-07, + "loss": 0.1416, + "step": 1775 + }, + { + "epoch": 0.46, + "learning_rate": 8.99e-07, + "loss": 0.1869, + "step": 1800 + }, + { + "epoch": 0.47, + "learning_rate": 9.115e-07, + "loss": 0.1507, + "step": 1825 + }, + { + "epoch": 0.48, + "learning_rate": 9.24e-07, + "loss": 0.1571, + "step": 1850 + }, + { + "epoch": 0.48, + "learning_rate": 9.365e-07, + "loss": 0.1348, + "step": 1875 + }, + { + "epoch": 0.49, + "learning_rate": 9.489999999999999e-07, + "loss": 0.1981, + "step": 1900 + }, + { + "epoch": 0.5, + "learning_rate": 9.615e-07, + "loss": 0.1324, + "step": 1925 + }, + { + "epoch": 0.5, + "learning_rate": 9.74e-07, + "loss": 0.1712, + "step": 1950 + }, + { + "epoch": 0.51, + "learning_rate": 9.865e-07, + "loss": 0.1169, + "step": 1975 + }, + { + "epoch": 0.52, + "learning_rate": 9.989999999999999e-07, + "loss": 0.1654, + "step": 2000 + }, + { + "epoch": 0.52, + "eval_loss": 0.1500108540058136, + "eval_runtime": 188234.7285, + "eval_samples_per_second": 0.05, + "eval_steps_per_second": 0.006, + "eval_wer": 0.10521978021978022, + "step": 2000 + } + ], + "logging_steps": 25, + "max_steps": 5000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "total_flos": 5.435997290496e+19, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2000/training_args.bin b/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f9b2ac6d109ba135c37e8de1802d3dfa05ab0bb5 Binary files /dev/null and b/checkpoint-2000/training_args.bin differ diff --git a/checkpoint-3000/config.json b/checkpoint-3000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..caa8aea5ccb9a4c479137c1f0ded8dc3bcc33fb2 --- /dev/null +++ b/checkpoint-3000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-large-v3", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 128, + "pad_token_id": 50256, + "scale_embedding": false, + "torch_dtype": "float32", + "transformers_version": "4.37.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51866 +} diff --git a/checkpoint-3000/generation_config.json b/checkpoint-3000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..9c184c23d34e10a8eb965f82773d9f8e3fb90c3d --- /dev/null +++ b/checkpoint-3000/generation_config.json @@ -0,0 +1,264 @@ +{ + "alignment_heads": [ + [ + 7, + 0 + ], + [ + 10, + 17 + ], + [ + 12, + 18 + ], + [ + 13, + 12 + ], + [ + 16, + 1 + ], + [ + 17, + 14 + ], + [ + 19, + 11 + ], + [ + 21, + 4 + ], + [ + 24, + 1 + ], + [ + 25, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50360 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|yue|>": 50358, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 1, + "max_length": 448, + "no_timestamps_token_id": 50364, + "pad_token_id": 50257, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50359, + 50360, + 50361, + 50362, + 50363 + ], + "task_to_id": { + "transcribe": 50360, + "translate": 50359 + }, + "transformers_version": "4.37.0.dev0" +} diff --git a/checkpoint-3000/model-00001-of-00002.safetensors b/checkpoint-3000/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48abeb97342dc287671ab8939818dcd3a2068f9f --- /dev/null +++ b/checkpoint-3000/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f878789c48bcae8bdb738a21db184b61ea25d207190b7a28b1886fd661820964 +size 4993448880 diff --git a/checkpoint-3000/model-00002-of-00002.safetensors b/checkpoint-3000/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e22e7b0291e3a54a8a4dac84bf7b5ba5c977fa6 --- /dev/null +++ b/checkpoint-3000/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cebf488a1c5179e1630e18b09c98e0fc49f633401cd2860996acc46e8ce31123 +size 1180663192 diff --git a/checkpoint-3000/model.safetensors.index.json b/checkpoint-3000/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..c40d652c9b765dc6b1f8a90b16063b93b7b5c888 --- /dev/null +++ b/checkpoint-3000/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173962240 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-3000/optimizer.pt b/checkpoint-3000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a5b2845f886a012b9e8966befc8fd0a50218a02 --- /dev/null +++ b/checkpoint-3000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b881cb7e883293384811c41e69a3af5ab73194ee3fd9c7fc959db40efbb8dce +size 12333660476 diff --git a/checkpoint-3000/preprocessor_config.json b/checkpoint-3000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..931c77a740890c46365c7ae0c9d350ba3cca908f --- /dev/null +++ b/checkpoint-3000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 128, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-3000/rng_state.pth b/checkpoint-3000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..843248eeeac0b73ed09791b65fc502facef80e89 Binary files /dev/null and b/checkpoint-3000/rng_state.pth differ diff --git a/checkpoint-3000/scheduler.pt b/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0dab76b6f4d51ae75eff90b97093c0661c1ef541 Binary files /dev/null and b/checkpoint-3000/scheduler.pt differ diff --git a/checkpoint-3000/trainer_state.json b/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..45c5deb81b9903f9b7f2cbdf1450c03bcfeb6b4c --- /dev/null +++ b/checkpoint-3000/trainer_state.json @@ -0,0 +1,768 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7741935483870968, + "eval_steps": 1000, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 1.1499999999999999e-08, + "loss": 0.7822, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-08, + "loss": 1.3149, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 3.6499999999999996e-08, + "loss": 0.7809, + "step": 75 + }, + { + "epoch": 0.03, + "learning_rate": 4.9e-08, + "loss": 1.2916, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 6.15e-08, + "loss": 0.7645, + "step": 125 + }, + { + "epoch": 0.04, + "learning_rate": 7.399999999999999e-08, + "loss": 1.3379, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 8.649999999999999e-08, + "loss": 0.7722, + "step": 175 + }, + { + "epoch": 0.05, + "learning_rate": 9.9e-08, + "loss": 1.281, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 1.115e-07, + "loss": 0.6998, + "step": 225 + }, + { + "epoch": 0.06, + "learning_rate": 1.24e-07, + "loss": 1.1532, + "step": 250 + }, + { + "epoch": 0.07, + "learning_rate": 1.365e-07, + "loss": 0.583, + "step": 275 + }, + { + "epoch": 0.08, + "learning_rate": 1.49e-07, + "loss": 0.6693, + "step": 300 + }, + { + "epoch": 0.08, + "learning_rate": 1.615e-07, + "loss": 0.3395, + "step": 325 + }, + { + "epoch": 0.09, + "learning_rate": 1.7399999999999997e-07, + "loss": 0.3597, + "step": 350 + }, + { + "epoch": 0.1, + "learning_rate": 1.8649999999999998e-07, + "loss": 0.22, + "step": 375 + }, + { + "epoch": 0.1, + "learning_rate": 1.99e-07, + "loss": 0.3449, + "step": 400 + }, + { + "epoch": 0.11, + "learning_rate": 2.1149999999999998e-07, + "loss": 0.2123, + "step": 425 + }, + { + "epoch": 0.12, + "learning_rate": 2.24e-07, + "loss": 0.3362, + "step": 450 + }, + { + "epoch": 0.12, + "learning_rate": 2.3649999999999998e-07, + "loss": 0.175, + "step": 475 + }, + { + "epoch": 0.13, + "learning_rate": 2.4899999999999997e-07, + "loss": 0.3051, + "step": 500 + }, + { + "epoch": 0.14, + "learning_rate": 2.615e-07, + "loss": 0.176, + "step": 525 + }, + { + "epoch": 0.14, + "learning_rate": 2.74e-07, + "loss": 0.2517, + "step": 550 + }, + { + "epoch": 0.15, + "learning_rate": 2.865e-07, + "loss": 0.1863, + "step": 575 + }, + { + "epoch": 0.15, + "learning_rate": 2.9899999999999996e-07, + "loss": 0.2614, + "step": 600 + }, + { + "epoch": 0.16, + "learning_rate": 3.115e-07, + "loss": 0.1942, + "step": 625 + }, + { + "epoch": 0.17, + "learning_rate": 3.24e-07, + "loss": 0.2901, + "step": 650 + }, + { + "epoch": 0.17, + "learning_rate": 3.3650000000000003e-07, + "loss": 0.1587, + "step": 675 + }, + { + "epoch": 0.18, + "learning_rate": 3.4899999999999996e-07, + "loss": 0.2128, + "step": 700 + }, + { + "epoch": 0.19, + "learning_rate": 3.6149999999999995e-07, + "loss": 0.1647, + "step": 725 + }, + { + "epoch": 0.19, + "learning_rate": 3.74e-07, + "loss": 0.2473, + "step": 750 + }, + { + "epoch": 0.2, + "learning_rate": 3.8649999999999997e-07, + "loss": 0.1415, + "step": 775 + }, + { + "epoch": 0.21, + "learning_rate": 3.99e-07, + "loss": 0.2367, + "step": 800 + }, + { + "epoch": 0.21, + "learning_rate": 4.1149999999999995e-07, + "loss": 0.1498, + "step": 825 + }, + { + "epoch": 0.22, + "learning_rate": 4.24e-07, + "loss": 0.2161, + "step": 850 + }, + { + "epoch": 0.23, + "learning_rate": 4.3649999999999997e-07, + "loss": 0.144, + "step": 875 + }, + { + "epoch": 0.23, + "learning_rate": 4.49e-07, + "loss": 0.2316, + "step": 900 + }, + { + "epoch": 0.24, + "learning_rate": 4.615e-07, + "loss": 0.1449, + "step": 925 + }, + { + "epoch": 0.25, + "learning_rate": 4.7399999999999993e-07, + "loss": 0.1513, + "step": 950 + }, + { + "epoch": 0.25, + "learning_rate": 4.864999999999999e-07, + "loss": 0.1855, + "step": 975 + }, + { + "epoch": 0.26, + "learning_rate": 4.99e-07, + "loss": 0.199, + "step": 1000 + }, + { + "epoch": 0.26, + "eval_loss": 0.15628743171691895, + "eval_runtime": 184706.1006, + "eval_samples_per_second": 0.051, + "eval_steps_per_second": 0.006, + "eval_wer": 0.1124272786037492, + "step": 1000 + }, + { + "epoch": 0.26, + "learning_rate": 5.114999999999999e-07, + "loss": 0.1301, + "step": 1025 + }, + { + "epoch": 0.27, + "learning_rate": 5.24e-07, + "loss": 0.2193, + "step": 1050 + }, + { + "epoch": 0.28, + "learning_rate": 5.365e-07, + "loss": 0.1552, + "step": 1075 + }, + { + "epoch": 0.28, + "learning_rate": 5.490000000000001e-07, + "loss": 0.1865, + "step": 1100 + }, + { + "epoch": 0.29, + "learning_rate": 5.614999999999999e-07, + "loss": 0.1618, + "step": 1125 + }, + { + "epoch": 0.3, + "learning_rate": 5.739999999999999e-07, + "loss": 0.2259, + "step": 1150 + }, + { + "epoch": 0.3, + "learning_rate": 5.865e-07, + "loss": 0.1418, + "step": 1175 + }, + { + "epoch": 0.31, + "learning_rate": 5.989999999999999e-07, + "loss": 0.1916, + "step": 1200 + }, + { + "epoch": 0.32, + "learning_rate": 6.115e-07, + "loss": 0.1295, + "step": 1225 + }, + { + "epoch": 0.32, + "learning_rate": 6.24e-07, + "loss": 0.2112, + "step": 1250 + }, + { + "epoch": 0.33, + "learning_rate": 6.365e-07, + "loss": 0.1231, + "step": 1275 + }, + { + "epoch": 0.34, + "learning_rate": 6.49e-07, + "loss": 0.1914, + "step": 1300 + }, + { + "epoch": 0.34, + "learning_rate": 6.614999999999999e-07, + "loss": 0.1485, + "step": 1325 + }, + { + "epoch": 0.35, + "learning_rate": 6.74e-07, + "loss": 0.1958, + "step": 1350 + }, + { + "epoch": 0.35, + "learning_rate": 6.865e-07, + "loss": 0.1452, + "step": 1375 + }, + { + "epoch": 0.36, + "learning_rate": 6.989999999999999e-07, + "loss": 0.1624, + "step": 1400 + }, + { + "epoch": 0.37, + "learning_rate": 7.115e-07, + "loss": 0.1518, + "step": 1425 + }, + { + "epoch": 0.37, + "learning_rate": 7.24e-07, + "loss": 0.1935, + "step": 1450 + }, + { + "epoch": 0.38, + "learning_rate": 7.365e-07, + "loss": 0.1138, + "step": 1475 + }, + { + "epoch": 0.39, + "learning_rate": 7.489999999999999e-07, + "loss": 0.16, + "step": 1500 + }, + { + "epoch": 0.39, + "learning_rate": 7.614999999999999e-07, + "loss": 0.1279, + "step": 1525 + }, + { + "epoch": 0.4, + "learning_rate": 7.74e-07, + "loss": 0.1862, + "step": 1550 + }, + { + "epoch": 0.41, + "learning_rate": 7.864999999999999e-07, + "loss": 0.1537, + "step": 1575 + }, + { + "epoch": 0.41, + "learning_rate": 7.99e-07, + "loss": 0.1915, + "step": 1600 + }, + { + "epoch": 0.42, + "learning_rate": 8.115e-07, + "loss": 0.1303, + "step": 1625 + }, + { + "epoch": 0.43, + "learning_rate": 8.24e-07, + "loss": 0.1967, + "step": 1650 + }, + { + "epoch": 0.43, + "learning_rate": 8.365e-07, + "loss": 0.1418, + "step": 1675 + }, + { + "epoch": 0.44, + "learning_rate": 8.489999999999999e-07, + "loss": 0.194, + "step": 1700 + }, + { + "epoch": 0.45, + "learning_rate": 8.615e-07, + "loss": 0.1377, + "step": 1725 + }, + { + "epoch": 0.45, + "learning_rate": 8.739999999999999e-07, + "loss": 0.1534, + "step": 1750 + }, + { + "epoch": 0.46, + "learning_rate": 8.864999999999999e-07, + "loss": 0.1416, + "step": 1775 + }, + { + "epoch": 0.46, + "learning_rate": 8.99e-07, + "loss": 0.1869, + "step": 1800 + }, + { + "epoch": 0.47, + "learning_rate": 9.115e-07, + "loss": 0.1507, + "step": 1825 + }, + { + "epoch": 0.48, + "learning_rate": 9.24e-07, + "loss": 0.1571, + "step": 1850 + }, + { + "epoch": 0.48, + "learning_rate": 9.365e-07, + "loss": 0.1348, + "step": 1875 + }, + { + "epoch": 0.49, + "learning_rate": 9.489999999999999e-07, + "loss": 0.1981, + "step": 1900 + }, + { + "epoch": 0.5, + "learning_rate": 9.615e-07, + "loss": 0.1324, + "step": 1925 + }, + { + "epoch": 0.5, + "learning_rate": 9.74e-07, + "loss": 0.1712, + "step": 1950 + }, + { + "epoch": 0.51, + "learning_rate": 9.865e-07, + "loss": 0.1169, + "step": 1975 + }, + { + "epoch": 0.52, + "learning_rate": 9.989999999999999e-07, + "loss": 0.1654, + "step": 2000 + }, + { + "epoch": 0.52, + "eval_loss": 0.1500108540058136, + "eval_runtime": 188234.7285, + "eval_samples_per_second": 0.05, + "eval_steps_per_second": 0.006, + "eval_wer": 0.10521978021978022, + "step": 2000 + }, + { + "epoch": 0.52, + "learning_rate": 9.923333333333332e-07, + "loss": 0.1202, + "step": 2025 + }, + { + "epoch": 0.53, + "learning_rate": 9.84e-07, + "loss": 0.186, + "step": 2050 + }, + { + "epoch": 0.54, + "learning_rate": 9.756666666666666e-07, + "loss": 0.1106, + "step": 2075 + }, + { + "epoch": 0.54, + "learning_rate": 9.673333333333332e-07, + "loss": 0.1926, + "step": 2100 + }, + { + "epoch": 0.55, + "learning_rate": 9.589999999999998e-07, + "loss": 0.1178, + "step": 2125 + }, + { + "epoch": 0.55, + "learning_rate": 9.506666666666667e-07, + "loss": 0.1744, + "step": 2150 + }, + { + "epoch": 0.56, + "learning_rate": 9.423333333333333e-07, + "loss": 0.1263, + "step": 2175 + }, + { + "epoch": 0.57, + "learning_rate": 9.34e-07, + "loss": 0.1818, + "step": 2200 + }, + { + "epoch": 0.57, + "learning_rate": 9.256666666666666e-07, + "loss": 0.1466, + "step": 2225 + }, + { + "epoch": 0.58, + "learning_rate": 9.173333333333333e-07, + "loss": 0.1357, + "step": 2250 + }, + { + "epoch": 0.59, + "learning_rate": 9.09e-07, + "loss": 0.148, + "step": 2275 + }, + { + "epoch": 0.59, + "learning_rate": 9.006666666666666e-07, + "loss": 0.1684, + "step": 2300 + }, + { + "epoch": 0.6, + "learning_rate": 8.923333333333333e-07, + "loss": 0.1368, + "step": 2325 + }, + { + "epoch": 0.61, + "learning_rate": 8.839999999999999e-07, + "loss": 0.188, + "step": 2350 + }, + { + "epoch": 0.61, + "learning_rate": 8.756666666666666e-07, + "loss": 0.1299, + "step": 2375 + }, + { + "epoch": 0.62, + "learning_rate": 8.673333333333332e-07, + "loss": 0.1461, + "step": 2400 + }, + { + "epoch": 0.63, + "learning_rate": 8.59e-07, + "loss": 0.1569, + "step": 2425 + }, + { + "epoch": 0.63, + "learning_rate": 8.506666666666667e-07, + "loss": 0.1527, + "step": 2450 + }, + { + "epoch": 0.64, + "learning_rate": 8.423333333333334e-07, + "loss": 0.1041, + "step": 2475 + }, + { + "epoch": 0.65, + "learning_rate": 8.34e-07, + "loss": 0.157, + "step": 2500 + }, + { + "epoch": 0.65, + "learning_rate": 8.256666666666666e-07, + "loss": 0.1364, + "step": 2525 + }, + { + "epoch": 0.66, + "learning_rate": 8.173333333333333e-07, + "loss": 0.1685, + "step": 2550 + }, + { + "epoch": 0.66, + "learning_rate": 8.09e-07, + "loss": 0.1344, + "step": 2575 + }, + { + "epoch": 0.67, + "learning_rate": 8.006666666666666e-07, + "loss": 0.1589, + "step": 2600 + }, + { + "epoch": 0.68, + "learning_rate": 7.923333333333333e-07, + "loss": 0.1334, + "step": 2625 + }, + { + "epoch": 0.68, + "learning_rate": 7.84e-07, + "loss": 0.1297, + "step": 2650 + }, + { + "epoch": 0.69, + "learning_rate": 7.756666666666665e-07, + "loss": 0.1326, + "step": 2675 + }, + { + "epoch": 0.7, + "learning_rate": 7.673333333333332e-07, + "loss": 0.179, + "step": 2700 + }, + { + "epoch": 0.7, + "learning_rate": 7.59e-07, + "loss": 0.1242, + "step": 2725 + }, + { + "epoch": 0.71, + "learning_rate": 7.506666666666667e-07, + "loss": 0.1714, + "step": 2750 + }, + { + "epoch": 0.72, + "learning_rate": 7.423333333333333e-07, + "loss": 0.1503, + "step": 2775 + }, + { + "epoch": 0.72, + "learning_rate": 7.34e-07, + "loss": 0.131, + "step": 2800 + }, + { + "epoch": 0.73, + "learning_rate": 7.256666666666667e-07, + "loss": 0.118, + "step": 2825 + }, + { + "epoch": 0.74, + "learning_rate": 7.173333333333333e-07, + "loss": 0.1729, + "step": 2850 + }, + { + "epoch": 0.74, + "learning_rate": 7.089999999999999e-07, + "loss": 0.1193, + "step": 2875 + }, + { + "epoch": 0.75, + "learning_rate": 7.006666666666666e-07, + "loss": 0.1723, + "step": 2900 + }, + { + "epoch": 0.75, + "learning_rate": 6.923333333333333e-07, + "loss": 0.1393, + "step": 2925 + }, + { + "epoch": 0.76, + "learning_rate": 6.84e-07, + "loss": 0.1259, + "step": 2950 + }, + { + "epoch": 0.77, + "learning_rate": 6.756666666666666e-07, + "loss": 0.1228, + "step": 2975 + }, + { + "epoch": 0.77, + "learning_rate": 6.673333333333334e-07, + "loss": 0.1794, + "step": 3000 + }, + { + "epoch": 0.77, + "eval_loss": 0.13788852095603943, + "eval_runtime": 198193.9708, + "eval_samples_per_second": 0.047, + "eval_steps_per_second": 0.006, + "eval_wer": 0.09970911441499677, + "step": 3000 + } + ], + "logging_steps": 25, + "max_steps": 5000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "total_flos": 8.153995935744e+19, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3000/training_args.bin b/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f9b2ac6d109ba135c37e8de1802d3dfa05ab0bb5 Binary files /dev/null and b/checkpoint-3000/training_args.bin differ diff --git a/checkpoint-4000/config.json b/checkpoint-4000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..caa8aea5ccb9a4c479137c1f0ded8dc3bcc33fb2 --- /dev/null +++ b/checkpoint-4000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-large-v3", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 128, + "pad_token_id": 50256, + "scale_embedding": false, + "torch_dtype": "float32", + "transformers_version": "4.37.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51866 +} diff --git a/checkpoint-4000/generation_config.json b/checkpoint-4000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cea34a613684b7726186da2813a3ecb580705460 --- /dev/null +++ b/checkpoint-4000/generation_config.json @@ -0,0 +1,265 @@ +{ + "alignment_heads": [ + [ + 7, + 0 + ], + [ + 10, + 17 + ], + [ + 12, + 18 + ], + [ + 13, + 12 + ], + [ + 16, + 1 + ], + [ + 17, + 14 + ], + [ + 19, + 11 + ], + [ + 21, + 4 + ], + [ + 24, + 1 + ], + [ + 25, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|yue|>": 50358, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50364, + "pad_token_id": 50257, + "prev_sot_token_id": 50362, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50359, + 50360, + 50361, + 50362, + 50363 + ], + "task_to_id": { + "transcribe": 50360, + "translate": 50359 + }, + "transformers_version": "4.37.0.dev0" +} diff --git a/checkpoint-4000/model-00001-of-00002.safetensors b/checkpoint-4000/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2cc9c49f461f301f3d100a0243f0f354aa4f6c82 --- /dev/null +++ b/checkpoint-4000/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fedb9cc896a6388bb4f5cddb373b7392782ec965512769af969c60e1af1a4e14 +size 4993448880 diff --git a/checkpoint-4000/model-00002-of-00002.safetensors b/checkpoint-4000/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f7cf5a93c975accb476a84757532fee9168d3a5 --- /dev/null +++ b/checkpoint-4000/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c6b0edf383dba5a20a86c58366da587af2b11f57f3238809e22174428275ba2 +size 1180663192 diff --git a/checkpoint-4000/model.safetensors.index.json b/checkpoint-4000/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..c40d652c9b765dc6b1f8a90b16063b93b7b5c888 --- /dev/null +++ b/checkpoint-4000/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173962240 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-4000/optimizer.pt b/checkpoint-4000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..753dc7c457199a95ad495dbc075b876c88c0dc7f --- /dev/null +++ b/checkpoint-4000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80e4f108eb5557c44d32434b4917e37c96c5a7f16fb94640266fc8260e5fd15e +size 12333660476 diff --git a/checkpoint-4000/preprocessor_config.json b/checkpoint-4000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..931c77a740890c46365c7ae0c9d350ba3cca908f --- /dev/null +++ b/checkpoint-4000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 128, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-4000/rng_state.pth b/checkpoint-4000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d308ddd3e170f9a065973619c69588d354b17428 Binary files /dev/null and b/checkpoint-4000/rng_state.pth differ diff --git a/checkpoint-4000/scheduler.pt b/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7957b6d08765ac2c37d8acb447251265daccbacb Binary files /dev/null and b/checkpoint-4000/scheduler.pt differ diff --git a/checkpoint-4000/trainer_state.json b/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e94ed50cd002bc6fa2d9bbd01625b4414e209ea4 --- /dev/null +++ b/checkpoint-4000/trainer_state.json @@ -0,0 +1,1017 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.032258064516129, + "eval_steps": 1000, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 1.1499999999999999e-08, + "loss": 0.7822, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-08, + "loss": 1.3149, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 3.6499999999999996e-08, + "loss": 0.7809, + "step": 75 + }, + { + "epoch": 0.03, + "learning_rate": 4.9e-08, + "loss": 1.2916, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 6.15e-08, + "loss": 0.7645, + "step": 125 + }, + { + "epoch": 0.04, + "learning_rate": 7.399999999999999e-08, + "loss": 1.3379, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 8.649999999999999e-08, + "loss": 0.7722, + "step": 175 + }, + { + "epoch": 0.05, + "learning_rate": 9.9e-08, + "loss": 1.281, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 1.115e-07, + "loss": 0.6998, + "step": 225 + }, + { + "epoch": 0.06, + "learning_rate": 1.24e-07, + "loss": 1.1532, + "step": 250 + }, + { + "epoch": 0.07, + "learning_rate": 1.365e-07, + "loss": 0.583, + "step": 275 + }, + { + "epoch": 0.08, + "learning_rate": 1.49e-07, + "loss": 0.6693, + "step": 300 + }, + { + "epoch": 0.08, + "learning_rate": 1.615e-07, + "loss": 0.3395, + "step": 325 + }, + { + "epoch": 0.09, + "learning_rate": 1.7399999999999997e-07, + "loss": 0.3597, + "step": 350 + }, + { + "epoch": 0.1, + "learning_rate": 1.8649999999999998e-07, + "loss": 0.22, + "step": 375 + }, + { + "epoch": 0.1, + "learning_rate": 1.99e-07, + "loss": 0.3449, + "step": 400 + }, + { + "epoch": 0.11, + "learning_rate": 2.1149999999999998e-07, + "loss": 0.2123, + "step": 425 + }, + { + "epoch": 0.12, + "learning_rate": 2.24e-07, + "loss": 0.3362, + "step": 450 + }, + { + "epoch": 0.12, + "learning_rate": 2.3649999999999998e-07, + "loss": 0.175, + "step": 475 + }, + { + "epoch": 0.13, + "learning_rate": 2.4899999999999997e-07, + "loss": 0.3051, + "step": 500 + }, + { + "epoch": 0.14, + "learning_rate": 2.615e-07, + "loss": 0.176, + "step": 525 + }, + { + "epoch": 0.14, + "learning_rate": 2.74e-07, + "loss": 0.2517, + "step": 550 + }, + { + "epoch": 0.15, + "learning_rate": 2.865e-07, + "loss": 0.1863, + "step": 575 + }, + { + "epoch": 0.15, + "learning_rate": 2.9899999999999996e-07, + "loss": 0.2614, + "step": 600 + }, + { + "epoch": 0.16, + "learning_rate": 3.115e-07, + "loss": 0.1942, + "step": 625 + }, + { + "epoch": 0.17, + "learning_rate": 3.24e-07, + "loss": 0.2901, + "step": 650 + }, + { + "epoch": 0.17, + "learning_rate": 3.3650000000000003e-07, + "loss": 0.1587, + "step": 675 + }, + { + "epoch": 0.18, + "learning_rate": 3.4899999999999996e-07, + "loss": 0.2128, + "step": 700 + }, + { + "epoch": 0.19, + "learning_rate": 3.6149999999999995e-07, + "loss": 0.1647, + "step": 725 + }, + { + "epoch": 0.19, + "learning_rate": 3.74e-07, + "loss": 0.2473, + "step": 750 + }, + { + "epoch": 0.2, + "learning_rate": 3.8649999999999997e-07, + "loss": 0.1415, + "step": 775 + }, + { + "epoch": 0.21, + "learning_rate": 3.99e-07, + "loss": 0.2367, + "step": 800 + }, + { + "epoch": 0.21, + "learning_rate": 4.1149999999999995e-07, + "loss": 0.1498, + "step": 825 + }, + { + "epoch": 0.22, + "learning_rate": 4.24e-07, + "loss": 0.2161, + "step": 850 + }, + { + "epoch": 0.23, + "learning_rate": 4.3649999999999997e-07, + "loss": 0.144, + "step": 875 + }, + { + "epoch": 0.23, + "learning_rate": 4.49e-07, + "loss": 0.2316, + "step": 900 + }, + { + "epoch": 0.24, + "learning_rate": 4.615e-07, + "loss": 0.1449, + "step": 925 + }, + { + "epoch": 0.25, + "learning_rate": 4.7399999999999993e-07, + "loss": 0.1513, + "step": 950 + }, + { + "epoch": 0.25, + "learning_rate": 4.864999999999999e-07, + "loss": 0.1855, + "step": 975 + }, + { + "epoch": 0.26, + "learning_rate": 4.99e-07, + "loss": 0.199, + "step": 1000 + }, + { + "epoch": 0.26, + "eval_loss": 0.15628743171691895, + "eval_runtime": 184706.1006, + "eval_samples_per_second": 0.051, + "eval_steps_per_second": 0.006, + "eval_wer": 0.1124272786037492, + "step": 1000 + }, + { + "epoch": 0.26, + "learning_rate": 5.114999999999999e-07, + "loss": 0.1301, + "step": 1025 + }, + { + "epoch": 0.27, + "learning_rate": 5.24e-07, + "loss": 0.2193, + "step": 1050 + }, + { + "epoch": 0.28, + "learning_rate": 5.365e-07, + "loss": 0.1552, + "step": 1075 + }, + { + "epoch": 0.28, + "learning_rate": 5.490000000000001e-07, + "loss": 0.1865, + "step": 1100 + }, + { + "epoch": 0.29, + "learning_rate": 5.614999999999999e-07, + "loss": 0.1618, + "step": 1125 + }, + { + "epoch": 0.3, + "learning_rate": 5.739999999999999e-07, + "loss": 0.2259, + "step": 1150 + }, + { + "epoch": 0.3, + "learning_rate": 5.865e-07, + "loss": 0.1418, + "step": 1175 + }, + { + "epoch": 0.31, + "learning_rate": 5.989999999999999e-07, + "loss": 0.1916, + "step": 1200 + }, + { + "epoch": 0.32, + "learning_rate": 6.115e-07, + "loss": 0.1295, + "step": 1225 + }, + { + "epoch": 0.32, + "learning_rate": 6.24e-07, + "loss": 0.2112, + "step": 1250 + }, + { + "epoch": 0.33, + "learning_rate": 6.365e-07, + "loss": 0.1231, + "step": 1275 + }, + { + "epoch": 0.34, + "learning_rate": 6.49e-07, + "loss": 0.1914, + "step": 1300 + }, + { + "epoch": 0.34, + "learning_rate": 6.614999999999999e-07, + "loss": 0.1485, + "step": 1325 + }, + { + "epoch": 0.35, + "learning_rate": 6.74e-07, + "loss": 0.1958, + "step": 1350 + }, + { + "epoch": 0.35, + "learning_rate": 6.865e-07, + "loss": 0.1452, + "step": 1375 + }, + { + "epoch": 0.36, + "learning_rate": 6.989999999999999e-07, + "loss": 0.1624, + "step": 1400 + }, + { + "epoch": 0.37, + "learning_rate": 7.115e-07, + "loss": 0.1518, + "step": 1425 + }, + { + "epoch": 0.37, + "learning_rate": 7.24e-07, + "loss": 0.1935, + "step": 1450 + }, + { + "epoch": 0.38, + "learning_rate": 7.365e-07, + "loss": 0.1138, + "step": 1475 + }, + { + "epoch": 0.39, + "learning_rate": 7.489999999999999e-07, + "loss": 0.16, + "step": 1500 + }, + { + "epoch": 0.39, + "learning_rate": 7.614999999999999e-07, + "loss": 0.1279, + "step": 1525 + }, + { + "epoch": 0.4, + "learning_rate": 7.74e-07, + "loss": 0.1862, + "step": 1550 + }, + { + "epoch": 0.41, + "learning_rate": 7.864999999999999e-07, + "loss": 0.1537, + "step": 1575 + }, + { + "epoch": 0.41, + "learning_rate": 7.99e-07, + "loss": 0.1915, + "step": 1600 + }, + { + "epoch": 0.42, + "learning_rate": 8.115e-07, + "loss": 0.1303, + "step": 1625 + }, + { + "epoch": 0.43, + "learning_rate": 8.24e-07, + "loss": 0.1967, + "step": 1650 + }, + { + "epoch": 0.43, + "learning_rate": 8.365e-07, + "loss": 0.1418, + "step": 1675 + }, + { + "epoch": 0.44, + "learning_rate": 8.489999999999999e-07, + "loss": 0.194, + "step": 1700 + }, + { + "epoch": 0.45, + "learning_rate": 8.615e-07, + "loss": 0.1377, + "step": 1725 + }, + { + "epoch": 0.45, + "learning_rate": 8.739999999999999e-07, + "loss": 0.1534, + "step": 1750 + }, + { + "epoch": 0.46, + "learning_rate": 8.864999999999999e-07, + "loss": 0.1416, + "step": 1775 + }, + { + "epoch": 0.46, + "learning_rate": 8.99e-07, + "loss": 0.1869, + "step": 1800 + }, + { + "epoch": 0.47, + "learning_rate": 9.115e-07, + "loss": 0.1507, + "step": 1825 + }, + { + "epoch": 0.48, + "learning_rate": 9.24e-07, + "loss": 0.1571, + "step": 1850 + }, + { + "epoch": 0.48, + "learning_rate": 9.365e-07, + "loss": 0.1348, + "step": 1875 + }, + { + "epoch": 0.49, + "learning_rate": 9.489999999999999e-07, + "loss": 0.1981, + "step": 1900 + }, + { + "epoch": 0.5, + "learning_rate": 9.615e-07, + "loss": 0.1324, + "step": 1925 + }, + { + "epoch": 0.5, + "learning_rate": 9.74e-07, + "loss": 0.1712, + "step": 1950 + }, + { + "epoch": 0.51, + "learning_rate": 9.865e-07, + "loss": 0.1169, + "step": 1975 + }, + { + "epoch": 0.52, + "learning_rate": 9.989999999999999e-07, + "loss": 0.1654, + "step": 2000 + }, + { + "epoch": 0.52, + "eval_loss": 0.1500108540058136, + "eval_runtime": 188234.7285, + "eval_samples_per_second": 0.05, + "eval_steps_per_second": 0.006, + "eval_wer": 0.10521978021978022, + "step": 2000 + }, + { + "epoch": 0.52, + "learning_rate": 9.923333333333332e-07, + "loss": 0.1202, + "step": 2025 + }, + { + "epoch": 0.53, + "learning_rate": 9.84e-07, + "loss": 0.186, + "step": 2050 + }, + { + "epoch": 0.54, + "learning_rate": 9.756666666666666e-07, + "loss": 0.1106, + "step": 2075 + }, + { + "epoch": 0.54, + "learning_rate": 9.673333333333332e-07, + "loss": 0.1926, + "step": 2100 + }, + { + "epoch": 0.55, + "learning_rate": 9.589999999999998e-07, + "loss": 0.1178, + "step": 2125 + }, + { + "epoch": 0.55, + "learning_rate": 9.506666666666667e-07, + "loss": 0.1744, + "step": 2150 + }, + { + "epoch": 0.56, + "learning_rate": 9.423333333333333e-07, + "loss": 0.1263, + "step": 2175 + }, + { + "epoch": 0.57, + "learning_rate": 9.34e-07, + "loss": 0.1818, + "step": 2200 + }, + { + "epoch": 0.57, + "learning_rate": 9.256666666666666e-07, + "loss": 0.1466, + "step": 2225 + }, + { + "epoch": 0.58, + "learning_rate": 9.173333333333333e-07, + "loss": 0.1357, + "step": 2250 + }, + { + "epoch": 0.59, + "learning_rate": 9.09e-07, + "loss": 0.148, + "step": 2275 + }, + { + "epoch": 0.59, + "learning_rate": 9.006666666666666e-07, + "loss": 0.1684, + "step": 2300 + }, + { + "epoch": 0.6, + "learning_rate": 8.923333333333333e-07, + "loss": 0.1368, + "step": 2325 + }, + { + "epoch": 0.61, + "learning_rate": 8.839999999999999e-07, + "loss": 0.188, + "step": 2350 + }, + { + "epoch": 0.61, + "learning_rate": 8.756666666666666e-07, + "loss": 0.1299, + "step": 2375 + }, + { + "epoch": 0.62, + "learning_rate": 8.673333333333332e-07, + "loss": 0.1461, + "step": 2400 + }, + { + "epoch": 0.63, + "learning_rate": 8.59e-07, + "loss": 0.1569, + "step": 2425 + }, + { + "epoch": 0.63, + "learning_rate": 8.506666666666667e-07, + "loss": 0.1527, + "step": 2450 + }, + { + "epoch": 0.64, + "learning_rate": 8.423333333333334e-07, + "loss": 0.1041, + "step": 2475 + }, + { + "epoch": 0.65, + "learning_rate": 8.34e-07, + "loss": 0.157, + "step": 2500 + }, + { + "epoch": 0.65, + "learning_rate": 8.256666666666666e-07, + "loss": 0.1364, + "step": 2525 + }, + { + "epoch": 0.66, + "learning_rate": 8.173333333333333e-07, + "loss": 0.1685, + "step": 2550 + }, + { + "epoch": 0.66, + "learning_rate": 8.09e-07, + "loss": 0.1344, + "step": 2575 + }, + { + "epoch": 0.67, + "learning_rate": 8.006666666666666e-07, + "loss": 0.1589, + "step": 2600 + }, + { + "epoch": 0.68, + "learning_rate": 7.923333333333333e-07, + "loss": 0.1334, + "step": 2625 + }, + { + "epoch": 0.68, + "learning_rate": 7.84e-07, + "loss": 0.1297, + "step": 2650 + }, + { + "epoch": 0.69, + "learning_rate": 7.756666666666665e-07, + "loss": 0.1326, + "step": 2675 + }, + { + "epoch": 0.7, + "learning_rate": 7.673333333333332e-07, + "loss": 0.179, + "step": 2700 + }, + { + "epoch": 0.7, + "learning_rate": 7.59e-07, + "loss": 0.1242, + "step": 2725 + }, + { + "epoch": 0.71, + "learning_rate": 7.506666666666667e-07, + "loss": 0.1714, + "step": 2750 + }, + { + "epoch": 0.72, + "learning_rate": 7.423333333333333e-07, + "loss": 0.1503, + "step": 2775 + }, + { + "epoch": 0.72, + "learning_rate": 7.34e-07, + "loss": 0.131, + "step": 2800 + }, + { + "epoch": 0.73, + "learning_rate": 7.256666666666667e-07, + "loss": 0.118, + "step": 2825 + }, + { + "epoch": 0.74, + "learning_rate": 7.173333333333333e-07, + "loss": 0.1729, + "step": 2850 + }, + { + "epoch": 0.74, + "learning_rate": 7.089999999999999e-07, + "loss": 0.1193, + "step": 2875 + }, + { + "epoch": 0.75, + "learning_rate": 7.006666666666666e-07, + "loss": 0.1723, + "step": 2900 + }, + { + "epoch": 0.75, + "learning_rate": 6.923333333333333e-07, + "loss": 0.1393, + "step": 2925 + }, + { + "epoch": 0.76, + "learning_rate": 6.84e-07, + "loss": 0.1259, + "step": 2950 + }, + { + "epoch": 0.77, + "learning_rate": 6.756666666666666e-07, + "loss": 0.1228, + "step": 2975 + }, + { + "epoch": 0.77, + "learning_rate": 6.673333333333334e-07, + "loss": 0.1794, + "step": 3000 + }, + { + "epoch": 0.77, + "eval_loss": 0.13788852095603943, + "eval_runtime": 198193.9708, + "eval_samples_per_second": 0.047, + "eval_steps_per_second": 0.006, + "eval_wer": 0.09970911441499677, + "step": 3000 + }, + { + "epoch": 0.78, + "learning_rate": 6.59e-07, + "loss": 0.1334, + "step": 3025 + }, + { + "epoch": 0.79, + "learning_rate": 6.506666666666666e-07, + "loss": 0.1584, + "step": 3050 + }, + { + "epoch": 0.79, + "learning_rate": 6.423333333333333e-07, + "loss": 0.1331, + "step": 3075 + }, + { + "epoch": 0.8, + "learning_rate": 6.346666666666666e-07, + "loss": 0.1226, + "step": 3100 + }, + { + "epoch": 0.81, + "learning_rate": 6.263333333333332e-07, + "loss": 0.135, + "step": 3125 + }, + { + "epoch": 0.81, + "learning_rate": 6.18e-07, + "loss": 0.1538, + "step": 3150 + }, + { + "epoch": 0.82, + "learning_rate": 6.096666666666667e-07, + "loss": 0.1405, + "step": 3175 + }, + { + "epoch": 0.83, + "learning_rate": 6.013333333333334e-07, + "loss": 0.1534, + "step": 3200 + }, + { + "epoch": 0.83, + "learning_rate": 5.93e-07, + "loss": 0.1628, + "step": 3225 + }, + { + "epoch": 0.84, + "learning_rate": 5.846666666666667e-07, + "loss": 0.2002, + "step": 3250 + }, + { + "epoch": 0.85, + "learning_rate": 5.763333333333333e-07, + "loss": 0.1155, + "step": 3275 + }, + { + "epoch": 0.85, + "learning_rate": 5.679999999999999e-07, + "loss": 0.179, + "step": 3300 + }, + { + "epoch": 0.86, + "learning_rate": 5.596666666666666e-07, + "loss": 0.1471, + "step": 3325 + }, + { + "epoch": 0.86, + "learning_rate": 5.513333333333333e-07, + "loss": 0.1386, + "step": 3350 + }, + { + "epoch": 0.87, + "learning_rate": 5.43e-07, + "loss": 0.1185, + "step": 3375 + }, + { + "epoch": 0.88, + "learning_rate": 5.346666666666666e-07, + "loss": 0.1418, + "step": 3400 + }, + { + "epoch": 0.88, + "learning_rate": 5.263333333333333e-07, + "loss": 0.1015, + "step": 3425 + }, + { + "epoch": 0.89, + "learning_rate": 5.18e-07, + "loss": 0.1525, + "step": 3450 + }, + { + "epoch": 0.9, + "learning_rate": 5.096666666666667e-07, + "loss": 0.1212, + "step": 3475 + }, + { + "epoch": 0.9, + "learning_rate": 5.013333333333333e-07, + "loss": 0.1623, + "step": 3500 + }, + { + "epoch": 0.91, + "learning_rate": 4.93e-07, + "loss": 0.1178, + "step": 3525 + }, + { + "epoch": 0.92, + "learning_rate": 4.846666666666667e-07, + "loss": 0.1618, + "step": 3550 + }, + { + "epoch": 0.92, + "learning_rate": 4.763333333333333e-07, + "loss": 0.1187, + "step": 3575 + }, + { + "epoch": 0.93, + "learning_rate": 4.68e-07, + "loss": 0.1382, + "step": 3600 + }, + { + "epoch": 0.94, + "learning_rate": 4.5966666666666667e-07, + "loss": 0.1211, + "step": 3625 + }, + { + "epoch": 0.94, + "learning_rate": 4.5133333333333327e-07, + "loss": 0.1582, + "step": 3650 + }, + { + "epoch": 0.95, + "learning_rate": 4.43e-07, + "loss": 0.1201, + "step": 3675 + }, + { + "epoch": 0.95, + "learning_rate": 4.3466666666666664e-07, + "loss": 0.1571, + "step": 3700 + }, + { + "epoch": 0.96, + "learning_rate": 4.263333333333333e-07, + "loss": 0.1247, + "step": 3725 + }, + { + "epoch": 0.97, + "learning_rate": 4.1799999999999996e-07, + "loss": 0.1648, + "step": 3750 + }, + { + "epoch": 0.97, + "learning_rate": 4.0966666666666667e-07, + "loss": 0.1313, + "step": 3775 + }, + { + "epoch": 0.98, + "learning_rate": 4.0133333333333333e-07, + "loss": 0.1528, + "step": 3800 + }, + { + "epoch": 0.99, + "learning_rate": 3.93e-07, + "loss": 0.1252, + "step": 3825 + }, + { + "epoch": 0.99, + "learning_rate": 3.8466666666666664e-07, + "loss": 0.1674, + "step": 3850 + }, + { + "epoch": 1.0, + "learning_rate": 3.7633333333333335e-07, + "loss": 0.1192, + "step": 3875 + }, + { + "epoch": 1.01, + "learning_rate": 3.6799999999999996e-07, + "loss": 0.1054, + "step": 3900 + }, + { + "epoch": 1.01, + "learning_rate": 3.5966666666666667e-07, + "loss": 0.1353, + "step": 3925 + }, + { + "epoch": 1.02, + "learning_rate": 3.5133333333333333e-07, + "loss": 0.1004, + "step": 3950 + }, + { + "epoch": 1.03, + "learning_rate": 3.43e-07, + "loss": 0.1382, + "step": 3975 + }, + { + "epoch": 1.03, + "learning_rate": 3.3466666666666665e-07, + "loss": 0.0821, + "step": 4000 + }, + { + "epoch": 1.03, + "eval_loss": 0.13208560645580292, + "eval_runtime": 194476.4747, + "eval_samples_per_second": 0.048, + "eval_steps_per_second": 0.006, + "eval_wer": 1.000743374272786, + "step": 4000 + } + ], + "logging_steps": 25, + "max_steps": 5000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "total_flos": 1.0871315081330688e+20, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4000/training_args.bin b/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..93e69a467867b9dbff737d816a19f85cc7227a5e Binary files /dev/null and b/checkpoint-4000/training_args.bin differ diff --git a/checkpoint-5000/config.json b/checkpoint-5000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..caa8aea5ccb9a4c479137c1f0ded8dc3bcc33fb2 --- /dev/null +++ b/checkpoint-5000/config.json @@ -0,0 +1,51 @@ +{ + "_name_or_path": "openai/whisper-large-v3", + "activation_dropout": 0.0, + "activation_function": "gelu", + "apply_spec_augment": true, + "architectures": [ + "WhisperForConditionalGeneration" + ], + "attention_dropout": 0.0, + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "classifier_proj_size": 256, + "d_model": 1280, + "decoder_attention_heads": 20, + "decoder_ffn_dim": 5120, + "decoder_layerdrop": 0.0, + "decoder_layers": 32, + "decoder_start_token_id": 50258, + "dropout": 0.0, + "encoder_attention_heads": 20, + "encoder_ffn_dim": 5120, + "encoder_layerdrop": 0.0, + "encoder_layers": 32, + "eos_token_id": 50257, + "forced_decoder_ids": null, + "init_std": 0.02, + "is_encoder_decoder": true, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "max_length": 448, + "max_source_positions": 1500, + "max_target_positions": 448, + "median_filter_width": 7, + "model_type": "whisper", + "num_hidden_layers": 32, + "num_mel_bins": 128, + "pad_token_id": 50256, + "scale_embedding": false, + "torch_dtype": "float32", + "transformers_version": "4.37.0.dev0", + "use_cache": true, + "use_weighted_layer_sum": false, + "vocab_size": 51866 +} diff --git a/checkpoint-5000/generation_config.json b/checkpoint-5000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cea34a613684b7726186da2813a3ecb580705460 --- /dev/null +++ b/checkpoint-5000/generation_config.json @@ -0,0 +1,265 @@ +{ + "alignment_heads": [ + [ + 7, + 0 + ], + [ + 10, + 17 + ], + [ + 12, + 18 + ], + [ + 13, + 12 + ], + [ + 16, + 1 + ], + [ + 17, + 14 + ], + [ + 19, + 11 + ], + [ + 21, + 4 + ], + [ + 24, + 1 + ], + [ + 25, + 6 + ] + ], + "begin_suppress_tokens": [ + 220, + 50257 + ], + "bos_token_id": 50257, + "decoder_start_token_id": 50258, + "eos_token_id": 50257, + "forced_decoder_ids": [ + [ + 1, + null + ], + [ + 2, + 50359 + ] + ], + "is_multilingual": true, + "lang_to_id": { + "<|af|>": 50327, + "<|am|>": 50334, + "<|ar|>": 50272, + "<|as|>": 50350, + "<|az|>": 50304, + "<|ba|>": 50355, + "<|be|>": 50330, + "<|bg|>": 50292, + "<|bn|>": 50302, + "<|bo|>": 50347, + "<|br|>": 50309, + "<|bs|>": 50315, + "<|ca|>": 50270, + "<|cs|>": 50283, + "<|cy|>": 50297, + "<|da|>": 50285, + "<|de|>": 50261, + "<|el|>": 50281, + "<|en|>": 50259, + "<|es|>": 50262, + "<|et|>": 50307, + "<|eu|>": 50310, + "<|fa|>": 50300, + "<|fi|>": 50277, + "<|fo|>": 50338, + "<|fr|>": 50265, + "<|gl|>": 50319, + "<|gu|>": 50333, + "<|haw|>": 50352, + "<|ha|>": 50354, + "<|he|>": 50279, + "<|hi|>": 50276, + "<|hr|>": 50291, + "<|ht|>": 50339, + "<|hu|>": 50286, + "<|hy|>": 50312, + "<|id|>": 50275, + "<|is|>": 50311, + "<|it|>": 50274, + "<|ja|>": 50266, + "<|jw|>": 50356, + "<|ka|>": 50329, + "<|kk|>": 50316, + "<|km|>": 50323, + "<|kn|>": 50306, + "<|ko|>": 50264, + "<|la|>": 50294, + "<|lb|>": 50345, + "<|ln|>": 50353, + "<|lo|>": 50336, + "<|lt|>": 50293, + "<|lv|>": 50301, + "<|mg|>": 50349, + "<|mi|>": 50295, + "<|mk|>": 50308, + "<|ml|>": 50296, + "<|mn|>": 50314, + "<|mr|>": 50320, + "<|ms|>": 50282, + "<|mt|>": 50343, + "<|my|>": 50346, + "<|ne|>": 50313, + "<|nl|>": 50271, + "<|nn|>": 50342, + "<|no|>": 50288, + "<|oc|>": 50328, + "<|pa|>": 50321, + "<|pl|>": 50269, + "<|ps|>": 50340, + "<|pt|>": 50267, + "<|ro|>": 50284, + "<|ru|>": 50263, + "<|sa|>": 50344, + "<|sd|>": 50332, + "<|si|>": 50322, + "<|sk|>": 50298, + "<|sl|>": 50305, + "<|sn|>": 50324, + "<|so|>": 50326, + "<|sq|>": 50317, + "<|sr|>": 50303, + "<|su|>": 50357, + "<|sv|>": 50273, + "<|sw|>": 50318, + "<|ta|>": 50287, + "<|te|>": 50299, + "<|tg|>": 50331, + "<|th|>": 50289, + "<|tk|>": 50341, + "<|tl|>": 50348, + "<|tr|>": 50268, + "<|tt|>": 50351, + "<|uk|>": 50280, + "<|ur|>": 50290, + "<|uz|>": 50337, + "<|vi|>": 50278, + "<|yi|>": 50335, + "<|yo|>": 50325, + "<|yue|>": 50358, + "<|zh|>": 50260 + }, + "max_initial_timestamp_index": 50, + "max_length": 448, + "no_timestamps_token_id": 50364, + "pad_token_id": 50257, + "prev_sot_token_id": 50362, + "return_timestamps": false, + "suppress_tokens": [ + 1, + 2, + 7, + 8, + 9, + 10, + 14, + 25, + 26, + 27, + 28, + 29, + 31, + 58, + 59, + 60, + 61, + 62, + 63, + 90, + 91, + 92, + 93, + 359, + 503, + 522, + 542, + 873, + 893, + 902, + 918, + 922, + 931, + 1350, + 1853, + 1982, + 2460, + 2627, + 3246, + 3253, + 3268, + 3536, + 3846, + 3961, + 4183, + 4667, + 6585, + 6647, + 7273, + 9061, + 9383, + 10428, + 10929, + 11938, + 12033, + 12331, + 12562, + 13793, + 14157, + 14635, + 15265, + 15618, + 16553, + 16604, + 18362, + 18956, + 20075, + 21675, + 22520, + 26130, + 26161, + 26435, + 28279, + 29464, + 31650, + 32302, + 32470, + 36865, + 42863, + 47425, + 49870, + 50254, + 50258, + 50359, + 50360, + 50361, + 50362, + 50363 + ], + "task_to_id": { + "transcribe": 50360, + "translate": 50359 + }, + "transformers_version": "4.37.0.dev0" +} diff --git a/checkpoint-5000/model-00001-of-00002.safetensors b/checkpoint-5000/model-00001-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a2dd64e5f0a7cc597d6b5de6514e7ea2d91dad6 --- /dev/null +++ b/checkpoint-5000/model-00001-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:162e92f863126b6badbad6104326cd65b2875f74bb0340e9b4d53e3d7cc4e83b +size 4993448880 diff --git a/checkpoint-5000/model-00002-of-00002.safetensors b/checkpoint-5000/model-00002-of-00002.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e583ef14471da6cdeca6620c93777120d9f7bfd3 --- /dev/null +++ b/checkpoint-5000/model-00002-of-00002.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a3553cfea4d55a22ddc4554e797d5f41b8d4ec64eff37633af911cf87725f80 +size 1180663192 diff --git a/checkpoint-5000/model.safetensors.index.json b/checkpoint-5000/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..c40d652c9b765dc6b1f8a90b16063b93b7b5c888 --- /dev/null +++ b/checkpoint-5000/model.safetensors.index.json @@ -0,0 +1,1266 @@ +{ + "metadata": { + "total_size": 6173962240 + }, + "weight_map": { + "model.decoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.decoder.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.decoder.layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.22.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.23.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.24.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.25.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.26.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.27.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.28.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.29.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.30.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.encoder_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc1.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.fc2.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.final_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.out_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.bias": "model-00002-of-00002.safetensors", + "model.decoder.layers.31.self_attn_layer_norm.weight": "model-00002-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.encoder_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.decoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv1.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv1.weight": "model-00001-of-00002.safetensors", + "model.encoder.conv2.bias": "model-00001-of-00002.safetensors", + "model.encoder.conv2.weight": "model-00001-of-00002.safetensors", + "model.encoder.embed_positions.weight": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.1.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.11.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.17.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.19.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.21.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.23.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.24.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.25.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.26.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.27.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.28.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.29.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.30.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.31.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.5.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc1.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.fc2.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.final_layer_norm.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.out_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.bias": "model-00001-of-00002.safetensors", + "model.encoder.layers.9.self_attn_layer_norm.weight": "model-00001-of-00002.safetensors" + } +} diff --git a/checkpoint-5000/optimizer.pt b/checkpoint-5000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..290099019ace04ffd23954be906dbdb5762d8863 --- /dev/null +++ b/checkpoint-5000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5f7fa7b096895a5ccb1902a0d8fec0c742bdca05c537aff90ca373cebe38514 +size 12333660476 diff --git a/checkpoint-5000/preprocessor_config.json b/checkpoint-5000/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..931c77a740890c46365c7ae0c9d350ba3cca908f --- /dev/null +++ b/checkpoint-5000/preprocessor_config.json @@ -0,0 +1,14 @@ +{ + "chunk_length": 30, + "feature_extractor_type": "WhisperFeatureExtractor", + "feature_size": 128, + "hop_length": 160, + "n_fft": 400, + "n_samples": 480000, + "nb_max_frames": 3000, + "padding_side": "right", + "padding_value": 0.0, + "processor_class": "WhisperProcessor", + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/checkpoint-5000/rng_state.pth b/checkpoint-5000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4f426eba0dc28e8fe02dc8a2bd18cc31e1922fd3 Binary files /dev/null and b/checkpoint-5000/rng_state.pth differ diff --git a/checkpoint-5000/scheduler.pt b/checkpoint-5000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4aa391619e624e3b72edd58336913651ef08416d Binary files /dev/null and b/checkpoint-5000/scheduler.pt differ diff --git a/checkpoint-5000/trainer_state.json b/checkpoint-5000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4e826aad502d0c7403ef5d848a35d469090e9e19 --- /dev/null +++ b/checkpoint-5000/trainer_state.json @@ -0,0 +1,1266 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2903225806451613, + "eval_steps": 1000, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 1.1499999999999999e-08, + "loss": 0.7822, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-08, + "loss": 1.3149, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 3.6499999999999996e-08, + "loss": 0.7809, + "step": 75 + }, + { + "epoch": 0.03, + "learning_rate": 4.9e-08, + "loss": 1.2916, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 6.15e-08, + "loss": 0.7645, + "step": 125 + }, + { + "epoch": 0.04, + "learning_rate": 7.399999999999999e-08, + "loss": 1.3379, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 8.649999999999999e-08, + "loss": 0.7722, + "step": 175 + }, + { + "epoch": 0.05, + "learning_rate": 9.9e-08, + "loss": 1.281, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 1.115e-07, + "loss": 0.6998, + "step": 225 + }, + { + "epoch": 0.06, + "learning_rate": 1.24e-07, + "loss": 1.1532, + "step": 250 + }, + { + "epoch": 0.07, + "learning_rate": 1.365e-07, + "loss": 0.583, + "step": 275 + }, + { + "epoch": 0.08, + "learning_rate": 1.49e-07, + "loss": 0.6693, + "step": 300 + }, + { + "epoch": 0.08, + "learning_rate": 1.615e-07, + "loss": 0.3395, + "step": 325 + }, + { + "epoch": 0.09, + "learning_rate": 1.7399999999999997e-07, + "loss": 0.3597, + "step": 350 + }, + { + "epoch": 0.1, + "learning_rate": 1.8649999999999998e-07, + "loss": 0.22, + "step": 375 + }, + { + "epoch": 0.1, + "learning_rate": 1.99e-07, + "loss": 0.3449, + "step": 400 + }, + { + "epoch": 0.11, + "learning_rate": 2.1149999999999998e-07, + "loss": 0.2123, + "step": 425 + }, + { + "epoch": 0.12, + "learning_rate": 2.24e-07, + "loss": 0.3362, + "step": 450 + }, + { + "epoch": 0.12, + "learning_rate": 2.3649999999999998e-07, + "loss": 0.175, + "step": 475 + }, + { + "epoch": 0.13, + "learning_rate": 2.4899999999999997e-07, + "loss": 0.3051, + "step": 500 + }, + { + "epoch": 0.14, + "learning_rate": 2.615e-07, + "loss": 0.176, + "step": 525 + }, + { + "epoch": 0.14, + "learning_rate": 2.74e-07, + "loss": 0.2517, + "step": 550 + }, + { + "epoch": 0.15, + "learning_rate": 2.865e-07, + "loss": 0.1863, + "step": 575 + }, + { + "epoch": 0.15, + "learning_rate": 2.9899999999999996e-07, + "loss": 0.2614, + "step": 600 + }, + { + "epoch": 0.16, + "learning_rate": 3.115e-07, + "loss": 0.1942, + "step": 625 + }, + { + "epoch": 0.17, + "learning_rate": 3.24e-07, + "loss": 0.2901, + "step": 650 + }, + { + "epoch": 0.17, + "learning_rate": 3.3650000000000003e-07, + "loss": 0.1587, + "step": 675 + }, + { + "epoch": 0.18, + "learning_rate": 3.4899999999999996e-07, + "loss": 0.2128, + "step": 700 + }, + { + "epoch": 0.19, + "learning_rate": 3.6149999999999995e-07, + "loss": 0.1647, + "step": 725 + }, + { + "epoch": 0.19, + "learning_rate": 3.74e-07, + "loss": 0.2473, + "step": 750 + }, + { + "epoch": 0.2, + "learning_rate": 3.8649999999999997e-07, + "loss": 0.1415, + "step": 775 + }, + { + "epoch": 0.21, + "learning_rate": 3.99e-07, + "loss": 0.2367, + "step": 800 + }, + { + "epoch": 0.21, + "learning_rate": 4.1149999999999995e-07, + "loss": 0.1498, + "step": 825 + }, + { + "epoch": 0.22, + "learning_rate": 4.24e-07, + "loss": 0.2161, + "step": 850 + }, + { + "epoch": 0.23, + "learning_rate": 4.3649999999999997e-07, + "loss": 0.144, + "step": 875 + }, + { + "epoch": 0.23, + "learning_rate": 4.49e-07, + "loss": 0.2316, + "step": 900 + }, + { + "epoch": 0.24, + "learning_rate": 4.615e-07, + "loss": 0.1449, + "step": 925 + }, + { + "epoch": 0.25, + "learning_rate": 4.7399999999999993e-07, + "loss": 0.1513, + "step": 950 + }, + { + "epoch": 0.25, + "learning_rate": 4.864999999999999e-07, + "loss": 0.1855, + "step": 975 + }, + { + "epoch": 0.26, + "learning_rate": 4.99e-07, + "loss": 0.199, + "step": 1000 + }, + { + "epoch": 0.26, + "eval_loss": 0.15628743171691895, + "eval_runtime": 184706.1006, + "eval_samples_per_second": 0.051, + "eval_steps_per_second": 0.006, + "eval_wer": 0.1124272786037492, + "step": 1000 + }, + { + "epoch": 0.26, + "learning_rate": 5.114999999999999e-07, + "loss": 0.1301, + "step": 1025 + }, + { + "epoch": 0.27, + "learning_rate": 5.24e-07, + "loss": 0.2193, + "step": 1050 + }, + { + "epoch": 0.28, + "learning_rate": 5.365e-07, + "loss": 0.1552, + "step": 1075 + }, + { + "epoch": 0.28, + "learning_rate": 5.490000000000001e-07, + "loss": 0.1865, + "step": 1100 + }, + { + "epoch": 0.29, + "learning_rate": 5.614999999999999e-07, + "loss": 0.1618, + "step": 1125 + }, + { + "epoch": 0.3, + "learning_rate": 5.739999999999999e-07, + "loss": 0.2259, + "step": 1150 + }, + { + "epoch": 0.3, + "learning_rate": 5.865e-07, + "loss": 0.1418, + "step": 1175 + }, + { + "epoch": 0.31, + "learning_rate": 5.989999999999999e-07, + "loss": 0.1916, + "step": 1200 + }, + { + "epoch": 0.32, + "learning_rate": 6.115e-07, + "loss": 0.1295, + "step": 1225 + }, + { + "epoch": 0.32, + "learning_rate": 6.24e-07, + "loss": 0.2112, + "step": 1250 + }, + { + "epoch": 0.33, + "learning_rate": 6.365e-07, + "loss": 0.1231, + "step": 1275 + }, + { + "epoch": 0.34, + "learning_rate": 6.49e-07, + "loss": 0.1914, + "step": 1300 + }, + { + "epoch": 0.34, + "learning_rate": 6.614999999999999e-07, + "loss": 0.1485, + "step": 1325 + }, + { + "epoch": 0.35, + "learning_rate": 6.74e-07, + "loss": 0.1958, + "step": 1350 + }, + { + "epoch": 0.35, + "learning_rate": 6.865e-07, + "loss": 0.1452, + "step": 1375 + }, + { + "epoch": 0.36, + "learning_rate": 6.989999999999999e-07, + "loss": 0.1624, + "step": 1400 + }, + { + "epoch": 0.37, + "learning_rate": 7.115e-07, + "loss": 0.1518, + "step": 1425 + }, + { + "epoch": 0.37, + "learning_rate": 7.24e-07, + "loss": 0.1935, + "step": 1450 + }, + { + "epoch": 0.38, + "learning_rate": 7.365e-07, + "loss": 0.1138, + "step": 1475 + }, + { + "epoch": 0.39, + "learning_rate": 7.489999999999999e-07, + "loss": 0.16, + "step": 1500 + }, + { + "epoch": 0.39, + "learning_rate": 7.614999999999999e-07, + "loss": 0.1279, + "step": 1525 + }, + { + "epoch": 0.4, + "learning_rate": 7.74e-07, + "loss": 0.1862, + "step": 1550 + }, + { + "epoch": 0.41, + "learning_rate": 7.864999999999999e-07, + "loss": 0.1537, + "step": 1575 + }, + { + "epoch": 0.41, + "learning_rate": 7.99e-07, + "loss": 0.1915, + "step": 1600 + }, + { + "epoch": 0.42, + "learning_rate": 8.115e-07, + "loss": 0.1303, + "step": 1625 + }, + { + "epoch": 0.43, + "learning_rate": 8.24e-07, + "loss": 0.1967, + "step": 1650 + }, + { + "epoch": 0.43, + "learning_rate": 8.365e-07, + "loss": 0.1418, + "step": 1675 + }, + { + "epoch": 0.44, + "learning_rate": 8.489999999999999e-07, + "loss": 0.194, + "step": 1700 + }, + { + "epoch": 0.45, + "learning_rate": 8.615e-07, + "loss": 0.1377, + "step": 1725 + }, + { + "epoch": 0.45, + "learning_rate": 8.739999999999999e-07, + "loss": 0.1534, + "step": 1750 + }, + { + "epoch": 0.46, + "learning_rate": 8.864999999999999e-07, + "loss": 0.1416, + "step": 1775 + }, + { + "epoch": 0.46, + "learning_rate": 8.99e-07, + "loss": 0.1869, + "step": 1800 + }, + { + "epoch": 0.47, + "learning_rate": 9.115e-07, + "loss": 0.1507, + "step": 1825 + }, + { + "epoch": 0.48, + "learning_rate": 9.24e-07, + "loss": 0.1571, + "step": 1850 + }, + { + "epoch": 0.48, + "learning_rate": 9.365e-07, + "loss": 0.1348, + "step": 1875 + }, + { + "epoch": 0.49, + "learning_rate": 9.489999999999999e-07, + "loss": 0.1981, + "step": 1900 + }, + { + "epoch": 0.5, + "learning_rate": 9.615e-07, + "loss": 0.1324, + "step": 1925 + }, + { + "epoch": 0.5, + "learning_rate": 9.74e-07, + "loss": 0.1712, + "step": 1950 + }, + { + "epoch": 0.51, + "learning_rate": 9.865e-07, + "loss": 0.1169, + "step": 1975 + }, + { + "epoch": 0.52, + "learning_rate": 9.989999999999999e-07, + "loss": 0.1654, + "step": 2000 + }, + { + "epoch": 0.52, + "eval_loss": 0.1500108540058136, + "eval_runtime": 188234.7285, + "eval_samples_per_second": 0.05, + "eval_steps_per_second": 0.006, + "eval_wer": 0.10521978021978022, + "step": 2000 + }, + { + "epoch": 0.52, + "learning_rate": 9.923333333333332e-07, + "loss": 0.1202, + "step": 2025 + }, + { + "epoch": 0.53, + "learning_rate": 9.84e-07, + "loss": 0.186, + "step": 2050 + }, + { + "epoch": 0.54, + "learning_rate": 9.756666666666666e-07, + "loss": 0.1106, + "step": 2075 + }, + { + "epoch": 0.54, + "learning_rate": 9.673333333333332e-07, + "loss": 0.1926, + "step": 2100 + }, + { + "epoch": 0.55, + "learning_rate": 9.589999999999998e-07, + "loss": 0.1178, + "step": 2125 + }, + { + "epoch": 0.55, + "learning_rate": 9.506666666666667e-07, + "loss": 0.1744, + "step": 2150 + }, + { + "epoch": 0.56, + "learning_rate": 9.423333333333333e-07, + "loss": 0.1263, + "step": 2175 + }, + { + "epoch": 0.57, + "learning_rate": 9.34e-07, + "loss": 0.1818, + "step": 2200 + }, + { + "epoch": 0.57, + "learning_rate": 9.256666666666666e-07, + "loss": 0.1466, + "step": 2225 + }, + { + "epoch": 0.58, + "learning_rate": 9.173333333333333e-07, + "loss": 0.1357, + "step": 2250 + }, + { + "epoch": 0.59, + "learning_rate": 9.09e-07, + "loss": 0.148, + "step": 2275 + }, + { + "epoch": 0.59, + "learning_rate": 9.006666666666666e-07, + "loss": 0.1684, + "step": 2300 + }, + { + "epoch": 0.6, + "learning_rate": 8.923333333333333e-07, + "loss": 0.1368, + "step": 2325 + }, + { + "epoch": 0.61, + "learning_rate": 8.839999999999999e-07, + "loss": 0.188, + "step": 2350 + }, + { + "epoch": 0.61, + "learning_rate": 8.756666666666666e-07, + "loss": 0.1299, + "step": 2375 + }, + { + "epoch": 0.62, + "learning_rate": 8.673333333333332e-07, + "loss": 0.1461, + "step": 2400 + }, + { + "epoch": 0.63, + "learning_rate": 8.59e-07, + "loss": 0.1569, + "step": 2425 + }, + { + "epoch": 0.63, + "learning_rate": 8.506666666666667e-07, + "loss": 0.1527, + "step": 2450 + }, + { + "epoch": 0.64, + "learning_rate": 8.423333333333334e-07, + "loss": 0.1041, + "step": 2475 + }, + { + "epoch": 0.65, + "learning_rate": 8.34e-07, + "loss": 0.157, + "step": 2500 + }, + { + "epoch": 0.65, + "learning_rate": 8.256666666666666e-07, + "loss": 0.1364, + "step": 2525 + }, + { + "epoch": 0.66, + "learning_rate": 8.173333333333333e-07, + "loss": 0.1685, + "step": 2550 + }, + { + "epoch": 0.66, + "learning_rate": 8.09e-07, + "loss": 0.1344, + "step": 2575 + }, + { + "epoch": 0.67, + "learning_rate": 8.006666666666666e-07, + "loss": 0.1589, + "step": 2600 + }, + { + "epoch": 0.68, + "learning_rate": 7.923333333333333e-07, + "loss": 0.1334, + "step": 2625 + }, + { + "epoch": 0.68, + "learning_rate": 7.84e-07, + "loss": 0.1297, + "step": 2650 + }, + { + "epoch": 0.69, + "learning_rate": 7.756666666666665e-07, + "loss": 0.1326, + "step": 2675 + }, + { + "epoch": 0.7, + "learning_rate": 7.673333333333332e-07, + "loss": 0.179, + "step": 2700 + }, + { + "epoch": 0.7, + "learning_rate": 7.59e-07, + "loss": 0.1242, + "step": 2725 + }, + { + "epoch": 0.71, + "learning_rate": 7.506666666666667e-07, + "loss": 0.1714, + "step": 2750 + }, + { + "epoch": 0.72, + "learning_rate": 7.423333333333333e-07, + "loss": 0.1503, + "step": 2775 + }, + { + "epoch": 0.72, + "learning_rate": 7.34e-07, + "loss": 0.131, + "step": 2800 + }, + { + "epoch": 0.73, + "learning_rate": 7.256666666666667e-07, + "loss": 0.118, + "step": 2825 + }, + { + "epoch": 0.74, + "learning_rate": 7.173333333333333e-07, + "loss": 0.1729, + "step": 2850 + }, + { + "epoch": 0.74, + "learning_rate": 7.089999999999999e-07, + "loss": 0.1193, + "step": 2875 + }, + { + "epoch": 0.75, + "learning_rate": 7.006666666666666e-07, + "loss": 0.1723, + "step": 2900 + }, + { + "epoch": 0.75, + "learning_rate": 6.923333333333333e-07, + "loss": 0.1393, + "step": 2925 + }, + { + "epoch": 0.76, + "learning_rate": 6.84e-07, + "loss": 0.1259, + "step": 2950 + }, + { + "epoch": 0.77, + "learning_rate": 6.756666666666666e-07, + "loss": 0.1228, + "step": 2975 + }, + { + "epoch": 0.77, + "learning_rate": 6.673333333333334e-07, + "loss": 0.1794, + "step": 3000 + }, + { + "epoch": 0.77, + "eval_loss": 0.13788852095603943, + "eval_runtime": 198193.9708, + "eval_samples_per_second": 0.047, + "eval_steps_per_second": 0.006, + "eval_wer": 0.09970911441499677, + "step": 3000 + }, + { + "epoch": 0.78, + "learning_rate": 6.59e-07, + "loss": 0.1334, + "step": 3025 + }, + { + "epoch": 0.79, + "learning_rate": 6.506666666666666e-07, + "loss": 0.1584, + "step": 3050 + }, + { + "epoch": 0.79, + "learning_rate": 6.423333333333333e-07, + "loss": 0.1331, + "step": 3075 + }, + { + "epoch": 0.8, + "learning_rate": 6.346666666666666e-07, + "loss": 0.1226, + "step": 3100 + }, + { + "epoch": 0.81, + "learning_rate": 6.263333333333332e-07, + "loss": 0.135, + "step": 3125 + }, + { + "epoch": 0.81, + "learning_rate": 6.18e-07, + "loss": 0.1538, + "step": 3150 + }, + { + "epoch": 0.82, + "learning_rate": 6.096666666666667e-07, + "loss": 0.1405, + "step": 3175 + }, + { + "epoch": 0.83, + "learning_rate": 6.013333333333334e-07, + "loss": 0.1534, + "step": 3200 + }, + { + "epoch": 0.83, + "learning_rate": 5.93e-07, + "loss": 0.1628, + "step": 3225 + }, + { + "epoch": 0.84, + "learning_rate": 5.846666666666667e-07, + "loss": 0.2002, + "step": 3250 + }, + { + "epoch": 0.85, + "learning_rate": 5.763333333333333e-07, + "loss": 0.1155, + "step": 3275 + }, + { + "epoch": 0.85, + "learning_rate": 5.679999999999999e-07, + "loss": 0.179, + "step": 3300 + }, + { + "epoch": 0.86, + "learning_rate": 5.596666666666666e-07, + "loss": 0.1471, + "step": 3325 + }, + { + "epoch": 0.86, + "learning_rate": 5.513333333333333e-07, + "loss": 0.1386, + "step": 3350 + }, + { + "epoch": 0.87, + "learning_rate": 5.43e-07, + "loss": 0.1185, + "step": 3375 + }, + { + "epoch": 0.88, + "learning_rate": 5.346666666666666e-07, + "loss": 0.1418, + "step": 3400 + }, + { + "epoch": 0.88, + "learning_rate": 5.263333333333333e-07, + "loss": 0.1015, + "step": 3425 + }, + { + "epoch": 0.89, + "learning_rate": 5.18e-07, + "loss": 0.1525, + "step": 3450 + }, + { + "epoch": 0.9, + "learning_rate": 5.096666666666667e-07, + "loss": 0.1212, + "step": 3475 + }, + { + "epoch": 0.9, + "learning_rate": 5.013333333333333e-07, + "loss": 0.1623, + "step": 3500 + }, + { + "epoch": 0.91, + "learning_rate": 4.93e-07, + "loss": 0.1178, + "step": 3525 + }, + { + "epoch": 0.92, + "learning_rate": 4.846666666666667e-07, + "loss": 0.1618, + "step": 3550 + }, + { + "epoch": 0.92, + "learning_rate": 4.763333333333333e-07, + "loss": 0.1187, + "step": 3575 + }, + { + "epoch": 0.93, + "learning_rate": 4.68e-07, + "loss": 0.1382, + "step": 3600 + }, + { + "epoch": 0.94, + "learning_rate": 4.5966666666666667e-07, + "loss": 0.1211, + "step": 3625 + }, + { + "epoch": 0.94, + "learning_rate": 4.5133333333333327e-07, + "loss": 0.1582, + "step": 3650 + }, + { + "epoch": 0.95, + "learning_rate": 4.43e-07, + "loss": 0.1201, + "step": 3675 + }, + { + "epoch": 0.95, + "learning_rate": 4.3466666666666664e-07, + "loss": 0.1571, + "step": 3700 + }, + { + "epoch": 0.96, + "learning_rate": 4.263333333333333e-07, + "loss": 0.1247, + "step": 3725 + }, + { + "epoch": 0.97, + "learning_rate": 4.1799999999999996e-07, + "loss": 0.1648, + "step": 3750 + }, + { + "epoch": 0.97, + "learning_rate": 4.0966666666666667e-07, + "loss": 0.1313, + "step": 3775 + }, + { + "epoch": 0.98, + "learning_rate": 4.0133333333333333e-07, + "loss": 0.1528, + "step": 3800 + }, + { + "epoch": 0.99, + "learning_rate": 3.93e-07, + "loss": 0.1252, + "step": 3825 + }, + { + "epoch": 0.99, + "learning_rate": 3.8466666666666664e-07, + "loss": 0.1674, + "step": 3850 + }, + { + "epoch": 1.0, + "learning_rate": 3.7633333333333335e-07, + "loss": 0.1192, + "step": 3875 + }, + { + "epoch": 1.01, + "learning_rate": 3.6799999999999996e-07, + "loss": 0.1054, + "step": 3900 + }, + { + "epoch": 1.01, + "learning_rate": 3.5966666666666667e-07, + "loss": 0.1353, + "step": 3925 + }, + { + "epoch": 1.02, + "learning_rate": 3.5133333333333333e-07, + "loss": 0.1004, + "step": 3950 + }, + { + "epoch": 1.03, + "learning_rate": 3.43e-07, + "loss": 0.1382, + "step": 3975 + }, + { + "epoch": 1.03, + "learning_rate": 3.3466666666666665e-07, + "loss": 0.0821, + "step": 4000 + }, + { + "epoch": 1.03, + "eval_loss": 0.13208560645580292, + "eval_runtime": 194476.4747, + "eval_samples_per_second": 0.048, + "eval_steps_per_second": 0.006, + "eval_wer": 1.000743374272786, + "step": 4000 + }, + { + "epoch": 1.04, + "learning_rate": 3.263333333333333e-07, + "loss": 0.1092, + "step": 4025 + }, + { + "epoch": 1.05, + "learning_rate": 3.18e-07, + "loss": 0.0928, + "step": 4050 + }, + { + "epoch": 1.05, + "learning_rate": 3.096666666666666e-07, + "loss": 0.1481, + "step": 4075 + }, + { + "epoch": 1.06, + "learning_rate": 3.0133333333333333e-07, + "loss": 0.1043, + "step": 4100 + }, + { + "epoch": 1.06, + "learning_rate": 2.93e-07, + "loss": 0.1018, + "step": 4125 + }, + { + "epoch": 1.07, + "learning_rate": 2.8466666666666665e-07, + "loss": 0.104, + "step": 4150 + }, + { + "epoch": 1.08, + "learning_rate": 2.763333333333333e-07, + "loss": 0.1394, + "step": 4175 + }, + { + "epoch": 1.08, + "learning_rate": 2.68e-07, + "loss": 0.0962, + "step": 4200 + }, + { + "epoch": 1.09, + "learning_rate": 2.596666666666666e-07, + "loss": 0.1058, + "step": 4225 + }, + { + "epoch": 1.1, + "learning_rate": 2.5133333333333333e-07, + "loss": 0.0837, + "step": 4250 + }, + { + "epoch": 1.1, + "learning_rate": 2.43e-07, + "loss": 0.1396, + "step": 4275 + }, + { + "epoch": 1.11, + "learning_rate": 2.3466666666666665e-07, + "loss": 0.1005, + "step": 4300 + }, + { + "epoch": 1.12, + "learning_rate": 2.263333333333333e-07, + "loss": 0.131, + "step": 4325 + }, + { + "epoch": 1.12, + "learning_rate": 2.18e-07, + "loss": 0.1069, + "step": 4350 + }, + { + "epoch": 1.13, + "learning_rate": 2.0966666666666665e-07, + "loss": 0.136, + "step": 4375 + }, + { + "epoch": 1.14, + "learning_rate": 2.0133333333333334e-07, + "loss": 0.0954, + "step": 4400 + }, + { + "epoch": 1.14, + "learning_rate": 1.93e-07, + "loss": 0.1276, + "step": 4425 + }, + { + "epoch": 1.15, + "learning_rate": 1.8466666666666665e-07, + "loss": 0.1033, + "step": 4450 + }, + { + "epoch": 1.15, + "learning_rate": 1.7633333333333334e-07, + "loss": 0.1547, + "step": 4475 + }, + { + "epoch": 1.16, + "learning_rate": 1.68e-07, + "loss": 0.1093, + "step": 4500 + }, + { + "epoch": 1.17, + "learning_rate": 1.5966666666666668e-07, + "loss": 0.1435, + "step": 4525 + }, + { + "epoch": 1.17, + "learning_rate": 1.513333333333333e-07, + "loss": 0.1085, + "step": 4550 + }, + { + "epoch": 1.18, + "learning_rate": 1.4299999999999997e-07, + "loss": 0.1105, + "step": 4575 + }, + { + "epoch": 1.19, + "learning_rate": 1.3466666666666665e-07, + "loss": 0.0858, + "step": 4600 + }, + { + "epoch": 1.19, + "learning_rate": 1.263333333333333e-07, + "loss": 0.1336, + "step": 4625 + }, + { + "epoch": 1.2, + "learning_rate": 1.1799999999999998e-07, + "loss": 0.0857, + "step": 4650 + }, + { + "epoch": 1.21, + "learning_rate": 1.0966666666666666e-07, + "loss": 0.1146, + "step": 4675 + }, + { + "epoch": 1.21, + "learning_rate": 1.0133333333333333e-07, + "loss": 0.097, + "step": 4700 + }, + { + "epoch": 1.22, + "learning_rate": 9.3e-08, + "loss": 0.1146, + "step": 4725 + }, + { + "epoch": 1.23, + "learning_rate": 8.466666666666667e-08, + "loss": 0.0932, + "step": 4750 + }, + { + "epoch": 1.23, + "learning_rate": 7.633333333333333e-08, + "loss": 0.1245, + "step": 4775 + }, + { + "epoch": 1.24, + "learning_rate": 6.8e-08, + "loss": 0.0978, + "step": 4800 + }, + { + "epoch": 1.25, + "learning_rate": 5.966666666666666e-08, + "loss": 0.1283, + "step": 4825 + }, + { + "epoch": 1.25, + "learning_rate": 5.133333333333333e-08, + "loss": 0.1091, + "step": 4850 + }, + { + "epoch": 1.26, + "learning_rate": 4.2999999999999995e-08, + "loss": 0.1106, + "step": 4875 + }, + { + "epoch": 1.26, + "learning_rate": 3.4666666666666666e-08, + "loss": 0.1131, + "step": 4900 + }, + { + "epoch": 1.27, + "learning_rate": 2.633333333333333e-08, + "loss": 0.1219, + "step": 4925 + }, + { + "epoch": 1.28, + "learning_rate": 1.8e-08, + "loss": 0.1205, + "step": 4950 + }, + { + "epoch": 1.28, + "learning_rate": 9.666666666666667e-09, + "loss": 0.1408, + "step": 4975 + }, + { + "epoch": 1.29, + "learning_rate": 1.3333333333333333e-09, + "loss": 0.1292, + "step": 5000 + }, + { + "epoch": 1.29, + "eval_loss": 0.132488414645195, + "eval_runtime": 160662.2084, + "eval_samples_per_second": 0.059, + "eval_steps_per_second": 0.007, + "eval_wer": 0.9998545572074984, + "step": 5000 + } + ], + "logging_steps": 25, + "max_steps": 5000, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 1000, + "total_flos": 1.3589313726578688e+20, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5000/training_args.bin b/checkpoint-5000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..93e69a467867b9dbff737d816a19f85cc7227a5e Binary files /dev/null and b/checkpoint-5000/training_args.bin differ diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..79aa0add2af4ec8813b08411562b98eaabce78be --- /dev/null +++ b/eval_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.29, + "eval_loss": 0.132488414645195, + "eval_runtime": 171706.1904, + "eval_samples": 9399, + "eval_samples_per_second": 0.055, + "eval_steps_per_second": 0.007, + "eval_wer": 0.9998545572074984 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json index 9c184c23d34e10a8eb965f82773d9f8e3fb90c3d..cea34a613684b7726186da2813a3ecb580705460 100644 --- a/generation_config.json +++ b/generation_config.json @@ -55,7 +55,7 @@ ], [ 2, - 50360 + 50359 ] ], "is_multilingual": true, @@ -161,10 +161,11 @@ "<|yue|>": 50358, "<|zh|>": 50260 }, - "max_initial_timestamp_index": 1, + "max_initial_timestamp_index": 50, "max_length": 448, "no_timestamps_token_id": 50364, "pad_token_id": 50257, + "prev_sot_token_id": 50362, "return_timestamps": false, "suppress_tokens": [ 1, diff --git a/model-00001-of-00002.safetensors b/model-00001-of-00002.safetensors index 2cc9c49f461f301f3d100a0243f0f354aa4f6c82..0a2dd64e5f0a7cc597d6b5de6514e7ea2d91dad6 100644 --- a/model-00001-of-00002.safetensors +++ b/model-00001-of-00002.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fedb9cc896a6388bb4f5cddb373b7392782ec965512769af969c60e1af1a4e14 +oid sha256:162e92f863126b6badbad6104326cd65b2875f74bb0340e9b4d53e3d7cc4e83b size 4993448880 diff --git a/model-00002-of-00002.safetensors b/model-00002-of-00002.safetensors index 2f7cf5a93c975accb476a84757532fee9168d3a5..e583ef14471da6cdeca6620c93777120d9f7bfd3 100644 --- a/model-00002-of-00002.safetensors +++ b/model-00002-of-00002.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5c6b0edf383dba5a20a86c58366da587af2b11f57f3238809e22174428275ba2 +oid sha256:0a3553cfea4d55a22ddc4554e797d5f41b8d4ec64eff37633af911cf87725f80 size 1180663192 diff --git a/runs/Feb01_13-09-49_DITEC2014063010/events.out.tfevents.1706804084.DITEC2014063010.17912.0 b/runs/Feb01_13-09-49_DITEC2014063010/events.out.tfevents.1706804084.DITEC2014063010.17912.0 index e899bc8097a46c22c256104695824fb3e30ce2fb..0c765727598d6ab6174eb7d1faf6a2f4b5557ef4 100644 Binary files a/runs/Feb01_13-09-49_DITEC2014063010/events.out.tfevents.1706804084.DITEC2014063010.17912.0 and b/runs/Feb01_13-09-49_DITEC2014063010/events.out.tfevents.1706804084.DITEC2014063010.17912.0 differ diff --git a/runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1707147265.DITEC2014063010.20076.0 b/runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1707147265.DITEC2014063010.20076.0 index d86bb49a108490d57901141f586bd3c30af3e1a5..0a566ef14d8f2cdf2c444bee1a1e2213ab6d36ac 100644 Binary files a/runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1707147265.DITEC2014063010.20076.0 and b/runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1707147265.DITEC2014063010.20076.0 differ diff --git a/runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1708496511.DITEC2014063010.20076.1 b/runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1708496511.DITEC2014063010.20076.1 index bfaf18bc3f1f8e7f9ac36fe9e4b1539448ecb72f..be10e1fbb2c169231ee49ccb295505ef04136c5d 100644 Binary files a/runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1708496511.DITEC2014063010.20076.1 and b/runs/Feb05_12-30-35_DITEC2014063010/events.out.tfevents.1708496511.DITEC2014063010.20076.1 differ diff --git a/runs/Jan18_11-02-40_DITEC2014063010/events.out.tfevents.1705588605.DITEC2014063010.4840.0 b/runs/Jan18_11-02-40_DITEC2014063010/events.out.tfevents.1705588605.DITEC2014063010.4840.0 index 41c73bd33a87ba97ab7a4563731d7f370479539c..90b3fd3d4a6d30e11d393989901482674924a020 100644 Binary files a/runs/Jan18_11-02-40_DITEC2014063010/events.out.tfevents.1705588605.DITEC2014063010.4840.0 and b/runs/Jan18_11-02-40_DITEC2014063010/events.out.tfevents.1705588605.DITEC2014063010.4840.0 differ diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..71d2d980c29aacf726f1651ddf2ccab2c5abd069 --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.29, + "train_loss": 0.05041759390830994, + "train_runtime": 1177508.0135, + "train_samples": 30998, + "train_samples_per_second": 0.034, + "train_steps_per_second": 0.004 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json index e94ed50cd002bc6fa2d9bbd01625b4414e209ea4..965fa11ac885f5fea6e8fdba72e0a0c54d821aa6 100644 --- a/trainer_state.json +++ b/trainer_state.json @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 1.032258064516129, + "epoch": 1.2903225806451613, "eval_steps": 1000, - "global_step": 4000, + "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -1003,6 +1003,264 @@ "eval_steps_per_second": 0.006, "eval_wer": 1.000743374272786, "step": 4000 + }, + { + "epoch": 1.04, + "learning_rate": 3.263333333333333e-07, + "loss": 0.1092, + "step": 4025 + }, + { + "epoch": 1.05, + "learning_rate": 3.18e-07, + "loss": 0.0928, + "step": 4050 + }, + { + "epoch": 1.05, + "learning_rate": 3.096666666666666e-07, + "loss": 0.1481, + "step": 4075 + }, + { + "epoch": 1.06, + "learning_rate": 3.0133333333333333e-07, + "loss": 0.1043, + "step": 4100 + }, + { + "epoch": 1.06, + "learning_rate": 2.93e-07, + "loss": 0.1018, + "step": 4125 + }, + { + "epoch": 1.07, + "learning_rate": 2.8466666666666665e-07, + "loss": 0.104, + "step": 4150 + }, + { + "epoch": 1.08, + "learning_rate": 2.763333333333333e-07, + "loss": 0.1394, + "step": 4175 + }, + { + "epoch": 1.08, + "learning_rate": 2.68e-07, + "loss": 0.0962, + "step": 4200 + }, + { + "epoch": 1.09, + "learning_rate": 2.596666666666666e-07, + "loss": 0.1058, + "step": 4225 + }, + { + "epoch": 1.1, + "learning_rate": 2.5133333333333333e-07, + "loss": 0.0837, + "step": 4250 + }, + { + "epoch": 1.1, + "learning_rate": 2.43e-07, + "loss": 0.1396, + "step": 4275 + }, + { + "epoch": 1.11, + "learning_rate": 2.3466666666666665e-07, + "loss": 0.1005, + "step": 4300 + }, + { + "epoch": 1.12, + "learning_rate": 2.263333333333333e-07, + "loss": 0.131, + "step": 4325 + }, + { + "epoch": 1.12, + "learning_rate": 2.18e-07, + "loss": 0.1069, + "step": 4350 + }, + { + "epoch": 1.13, + "learning_rate": 2.0966666666666665e-07, + "loss": 0.136, + "step": 4375 + }, + { + "epoch": 1.14, + "learning_rate": 2.0133333333333334e-07, + "loss": 0.0954, + "step": 4400 + }, + { + "epoch": 1.14, + "learning_rate": 1.93e-07, + "loss": 0.1276, + "step": 4425 + }, + { + "epoch": 1.15, + "learning_rate": 1.8466666666666665e-07, + "loss": 0.1033, + "step": 4450 + }, + { + "epoch": 1.15, + "learning_rate": 1.7633333333333334e-07, + "loss": 0.1547, + "step": 4475 + }, + { + "epoch": 1.16, + "learning_rate": 1.68e-07, + "loss": 0.1093, + "step": 4500 + }, + { + "epoch": 1.17, + "learning_rate": 1.5966666666666668e-07, + "loss": 0.1435, + "step": 4525 + }, + { + "epoch": 1.17, + "learning_rate": 1.513333333333333e-07, + "loss": 0.1085, + "step": 4550 + }, + { + "epoch": 1.18, + "learning_rate": 1.4299999999999997e-07, + "loss": 0.1105, + "step": 4575 + }, + { + "epoch": 1.19, + "learning_rate": 1.3466666666666665e-07, + "loss": 0.0858, + "step": 4600 + }, + { + "epoch": 1.19, + "learning_rate": 1.263333333333333e-07, + "loss": 0.1336, + "step": 4625 + }, + { + "epoch": 1.2, + "learning_rate": 1.1799999999999998e-07, + "loss": 0.0857, + "step": 4650 + }, + { + "epoch": 1.21, + "learning_rate": 1.0966666666666666e-07, + "loss": 0.1146, + "step": 4675 + }, + { + "epoch": 1.21, + "learning_rate": 1.0133333333333333e-07, + "loss": 0.097, + "step": 4700 + }, + { + "epoch": 1.22, + "learning_rate": 9.3e-08, + "loss": 0.1146, + "step": 4725 + }, + { + "epoch": 1.23, + "learning_rate": 8.466666666666667e-08, + "loss": 0.0932, + "step": 4750 + }, + { + "epoch": 1.23, + "learning_rate": 7.633333333333333e-08, + "loss": 0.1245, + "step": 4775 + }, + { + "epoch": 1.24, + "learning_rate": 6.8e-08, + "loss": 0.0978, + "step": 4800 + }, + { + "epoch": 1.25, + "learning_rate": 5.966666666666666e-08, + "loss": 0.1283, + "step": 4825 + }, + { + "epoch": 1.25, + "learning_rate": 5.133333333333333e-08, + "loss": 0.1091, + "step": 4850 + }, + { + "epoch": 1.26, + "learning_rate": 4.2999999999999995e-08, + "loss": 0.1106, + "step": 4875 + }, + { + "epoch": 1.26, + "learning_rate": 3.4666666666666666e-08, + "loss": 0.1131, + "step": 4900 + }, + { + "epoch": 1.27, + "learning_rate": 2.633333333333333e-08, + "loss": 0.1219, + "step": 4925 + }, + { + "epoch": 1.28, + "learning_rate": 1.8e-08, + "loss": 0.1205, + "step": 4950 + }, + { + "epoch": 1.28, + "learning_rate": 9.666666666666667e-09, + "loss": 0.1408, + "step": 4975 + }, + { + "epoch": 1.29, + "learning_rate": 1.3333333333333333e-09, + "loss": 0.1292, + "step": 5000 + }, + { + "epoch": 1.29, + "eval_loss": 0.132488414645195, + "eval_runtime": 160662.2084, + "eval_samples_per_second": 0.059, + "eval_steps_per_second": 0.007, + "eval_wer": 0.9998545572074984, + "step": 5000 + }, + { + "epoch": 1.29, + "step": 5000, + "total_flos": 1.3589313726578688e+20, + "train_loss": 0.05041759390830994, + "train_runtime": 1177508.0135, + "train_samples_per_second": 0.034, + "train_steps_per_second": 0.004 } ], "logging_steps": 25, @@ -1010,7 +1268,7 @@ "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, - "total_flos": 1.0871315081330688e+20, + "total_flos": 1.3589313726578688e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null diff --git a/training_args.bin b/training_args.bin index 55243e524c6b8e62c8516df3c33e0beaed5b29d9..93e69a467867b9dbff737d816a19f85cc7227a5e 100644 Binary files a/training_args.bin and b/training_args.bin differ