Upload model

Browse files

Files changed (14) hide show

README.md +155 -0
added_tokens.json +100 -0
config.json +91 -0
generation_config.json +0 -0
model-00001-of-00003.safetensors +3 -0
model-00002-of-00003.safetensors +3 -0
model-00003-of-00003.safetensors +3 -0
model.safetensors.index.json +1020 -0
preprocessor_config.json +111 -0
sentencepiece.bpe.model +3 -0
special_tokens_map.json +144 -0
spm_char_lang38_tc.model +3 -0
tokenizer.model +3 -0
tokenizer_config.json +933 -0

README.md ADDED Viewed

	@@ -0,0 +1,155 @@

+---
+license: cc-by-nc-4.0
+---
+# SeamlessM4T-v2 T2TT Lite Model
+Extracted from `facebook/seamless-m4t-v2-large`, containing only T2TT (Text-to-Text Translation) components.
+> Original Model: [facebook/seamless-m4t-v2-large](https://huggingface.co/facebook/seamless-m4t-v2-large)
+>
+> Official Documentation: [SeamlessM4T-v2 Documentation](https://huggingface.co/docs/transformers/en/model_doc/seamless_m4t_v2)
+Note: This package only reorganizes publicly available weights from Meta's original model for T2TT usage. No new training or fine-tuning is introduced. All rights of the model and weights belong to their original owner.
+## Supported Features
+- **T2TT (Text-to-Text Translation)**: Multilingual text translation
+- **96 Languages**: Supports text translation between 96 languages
+## Included Components
+### Model Weights
+- `text_encoder`: Text encoder
+- `text_decoder`: Text decoder
+- `shared.weight`: Shared word embeddings
+- `lang_embed`: Language embeddings
+## Model Size
+- Original Model: ~8.6 GB
+- Lite Model: ~5.1 GB
+- Removed Weights: 1219 (speech_encoder, t2u_model, vocoder)
+- Space Saved: ~3.5 GB
+## Usage Examples
+### 1. Basic T2TT: Text-to-Text Translation
+```python
+from transformers import SeamlessM4Tv2Model, AutoProcessor
+# Load model
+model = SeamlessM4Tv2Model.from_pretrained("jaman21/seamless-m4t-v2-t2tt")
+processor = AutoProcessor.from_pretrained("jaman21/seamless-m4t-v2-t2tt")
+# Translate text
+text_inputs = processor(text="Hello, how are you?", src_lang="eng", return_tensors="pt")
+output_tokens = model.generate(**text_inputs, tgt_lang="fra", generate_speech=False)
+translated_text = processor.decode(output_tokens[0].tolist()[0], skip_special_tokens=True)
+print(translated_text)  # "Bonjour, comment allez-vous?"
+```
+### 2. Advanced Generation Strategies
+```python
+# Beam search for better quality (slower)
+text_inputs = processor(text="The quick brown fox jumps", src_lang="eng", return_tensors="pt")
+outputs = model.generate(
+    **text_inputs,
+    tgt_lang="jpn",
+    generate_speech=False,
+    num_beams=5,              # Use beam search
+    max_new_tokens=256,
+    early_stopping=True
+)
+# Sampling for more diverse output
+outputs = model.generate(
+    **text_inputs,
+    tgt_lang="kor",
+    generate_speech=False,
+    do_sample=True,           # Enable sampling
+    top_k=50,
+    top_p=0.95,
+    temperature=0.8           # 0.0-1.0: lower is more deterministic, higher is more random (affects translation quality)
+)
+```
+### 3. Batch Processing Multiple Texts
+```python
+# Process multiple texts at once
+texts = [
+    "Hello, how are you?",
+    "What is your name?",
+    "Nice to meet you!"
+]
+text_inputs = processor(text=texts, src_lang="eng", return_tensors="pt", padding=True)
+output_tokens = model.generate(**text_inputs, tgt_lang="ita", generate_speech=False)
+# Decode all outputs
+translations = processor.batch_decode(output_tokens, skip_special_tokens=True)
+for orig, trans in zip(texts, translations):
+    print(f"{orig} -> {trans}")
+```
+### 4. Control Generation Length and Quality
+```python
+text_inputs = processor(text="Translate this sentence", src_lang="eng", return_tensors="pt")
+# Higher quality but more computationally expensive
+high_quality_output = model.generate(
+    **text_inputs,
+    tgt_lang="rus",
+    generate_speech=False,
+    num_beams=5,              # Beam search
+    max_new_tokens=512,       # Allow longer output
+    length_penalty=1.0,       # No length penalty
+    early_stopping=True,
+    use_cache=True            # Accelerate generation
+)
+# Faster generation speed, acceptable quality
+fast_output = model.generate(
+    **text_inputs,
+    tgt_lang="rus",
+    generate_speech=False,
+    num_beams=1,              # Greedy decoding for better translation quality (slower)
+    max_new_tokens=256,
+    use_cache=True
+)
+```
+### 5. GPU/CPU Usage
+```python
+import torch
+# Move model to GPU if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = model.to(device)
+# Process inputs on the same device
+text_inputs = processor(text="Hello", src_lang="eng", return_tensors="pt")
+text_inputs = {k: v.to(device) for k, v in text_inputs.items()}
+# Generate
+with torch.inference_mode():  # More efficient than torch.no_grad()
+    outputs = model.generate(**text_inputs, tgt_lang="cmn", generate_speech=False)
+```
+## License
+Same as the original model: **CC-BY-NC-4.0**
+For commercial use, please refer to Meta's licensing terms.
+## References
+- [SeamlessM4T-v2 Paper](https://arxiv.org/abs/2312.05187)
+- [Official Model Card](https://huggingface.co/facebook/seamless-m4t-v2-large)
+- [Transformers Documentation](https://huggingface.co/docs/transformers/en/model_doc/seamless_m4t_v2)
+- [GitHub Repository](https://github.com/facebookresearch/seamless_communication)

added_tokens.json ADDED Viewed

	@@ -0,0 +1,100 @@

+{
+  "__afr__": 256001,
+  "__amh__": 256002,
+  "__arb__": 256003,
+  "__ary__": 256004,
+  "__arz__": 256005,
+  "__asm__": 256006,
+  "__azj__": 256007,
+  "__bel__": 256008,
+  "__ben__": 256009,
+  "__bos__": 256010,
+  "__bul__": 256011,
+  "__cat__": 256012,
+  "__ceb__": 256013,
+  "__ces__": 256014,
+  "__ckb__": 256015,
+  "__cmn_Hant__": 256017,
+  "__cmn__": 256016,
+  "__cym__": 256018,
+  "__dan__": 256019,
+  "__deu__": 256020,
+  "__ell__": 256021,
+  "__eng__": 256022,
+  "__est__": 256023,
+  "__eus__": 256024,
+  "__fin__": 256025,
+  "__fra__": 256026,
+  "__fuv__": 256027,
+  "__gaz__": 256028,
+  "__gle__": 256029,
+  "__glg__": 256030,
+  "__guj__": 256031,
+  "__heb__": 256032,
+  "__hin__": 256033,
+  "__hrv__": 256034,
+  "__hun__": 256035,
+  "__hye__": 256036,
+  "__ibo__": 256037,
+  "__ind__": 256038,
+  "__isl__": 256039,
+  "__ita__": 256040,
+  "__jav__": 256041,
+  "__jpn__": 256042,
+  "__kan__": 256043,
+  "__kat__": 256044,
+  "__kaz__": 256045,
+  "__khk__": 256046,
+  "__khm__": 256047,
+  "__kir__": 256048,
+  "__kor__": 256049,
+  "__lao__": 256050,
+  "__lit__": 256051,
+  "__lug__": 256052,
+  "__luo__": 256053,
+  "__lvs__": 256054,
+  "__mai__": 256055,
+  "__mal__": 256056,
+  "__mar__": 256057,
+  "__mkd__": 256058,
+  "__mlt__": 256059,
+  "__mni__": 256060,
+  "__mya__": 256061,
+  "__nld__": 256062,
+  "__nno__": 256063,
+  "__nob__": 256064,
+  "__npi__": 256065,
+  "__nya__": 256066,
+  "__ory__": 256067,
+  "__pan__": 256068,
+  "__pbt__": 256069,
+  "__pes__": 256070,
+  "__pol__": 256071,
+  "__por__": 256072,
+  "__ron__": 256073,
+  "__rus__": 256074,
+  "__sat__": 256075,
+  "__slk__": 256076,
+  "__slv__": 256077,
+  "__sna__": 256078,
+  "__snd__": 256079,
+  "__som__": 256080,
+  "__spa__": 256081,
+  "__srp__": 256082,
+  "__swe__": 256083,
+  "__swh__": 256084,
+  "__tam__": 256085,
+  "__tel__": 256086,
+  "__tgk__": 256087,
+  "__tgl__": 256088,
+  "__tha__": 256089,
+  "__tur__": 256090,
+  "__ukr__": 256091,
+  "__urd__": 256092,
+  "__uzn__": 256093,
+  "__vie__": 256094,
+  "__yor__": 256095,
+  "__yue__": 256096,
+  "__zlm__": 256097,
+  "__zul__": 256098
+}

config.json ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "activation_dropout": 0.0,
+  "activation_function": "relu",
+  "adaptor_dropout": 0.1,
+  "adaptor_kernel_size": 8,
+  "adaptor_stride": 8,
+  "add_adapter": true,
+  "architectures": [
+    "SeamlessM4Tv2Model"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 2,
+  "char_vocab_size": 10943,
+  "conv_depthwise_kernel_size": 31,
+  "decoder_attention_heads": 16,
+  "decoder_ffn_dim": 8192,
+  "decoder_layerdrop": 0.05,
+  "decoder_layers": 24,
+  "decoder_start_token_id": 3,
+  "dropout": 0.1,
+  "encoder_attention_heads": 16,
+  "encoder_ffn_dim": 8192,
+  "encoder_layerdrop": 0.05,
+  "encoder_layers": 24,
+  "eos_token_id": 3,
+  "feature_projection_input_dim": 160,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "is_encoder_decoder": true,
+  "lang_embed_dim": 256,
+  "layer_norm_eps": 1e-05,
+  "leaky_relu_slope": 0.1,
+  "left_max_position_embeddings": 64,
+  "max_new_tokens": 256,
+  "max_position_embeddings": 4096,
+  "model_type": "seamless_m4t_v2",
+  "num_adapter_layers": 1,
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "position_embeddings_type": "relative_key",
+  "resblock_dilation_sizes": [
+    [
+      1,
+      3,
+      5
+    ],
+    [
+      1,
+      3,
+      5
+    ],
+    [
+      1,
+      3,
+      5
+    ]
+  ],
+  "resblock_kernel_sizes": [
+    3,
+    7,
+    11
+  ],
+  "right_max_position_embeddings": 8,
+  "sampling_rate": 16000,
+  "scale_embedding": true,
+  "spkr_embed_dim": 256,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.0.dev0",
+  "unit_embed_dim": 1280,
+  "unit_hifi_gan_vocab_size": 10000,
+  "upsample_initial_channel": 512,
+  "upsample_kernel_sizes": [
+    11,
+    8,
+    8,
+    4,
+    4
+  ],
+  "upsample_rates": [
+    5,
+    4,
+    4,
+    2,
+    2
+  ],
+  "use_cache": true,
+  "var_pred_dropout": 0.5,
+  "variance_predictor_kernel_size": 3,
+  "vocab_size": 256102
+}

generation_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e613ef5f42064d4d02e0aff843abe52799b7d9e2812da03cc3368e0e8b7c61f1
+size 1490250264

model-00002-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8399188880128295781ba474a1fa74baa001ffa3865e747d20289cfb9f8c2870
+size 2144776632

model-00003-of-00003.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a9dfbc955e504c18baa08623913a88bb325b0d2db5d9c91282abf9ef759cc49
+size 1847220640

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,1020 @@

+{
+  "metadata": {
+    "total_size": 5482127360
+  },
+  "weight_map": {
+    "text_encoder.layers.12.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.2.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.3.cross_attention.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.14.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.2.cross_attention.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.17.ffn.fc1.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.18.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.7.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.22.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.22.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.16.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.13.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.20.cross_attention_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.4.cross_attention.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.21.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.0.cross_attention.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.15.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.10.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.5.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.22.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.22.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.0.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.2.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.8.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.5.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.2.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.19.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.14.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.14.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.12.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.9.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.0.cross_attention.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.17.cross_attention.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.3.cross_attention.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.6.cross_attention_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.12.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.8.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.23.cross_attention.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.12.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.1.ffn.fc2.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.cross_attention.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.1.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.18.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.15.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.14.cross_attention_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.1.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.11.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.2.ffn.fc1.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.5.ffn.fc1.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.11.cross_attention_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.cross_attention_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.16.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.15.cross_attention.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.19.cross_attention.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.4.ffn.fc2.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.3.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.4.cross_attention.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.20.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.18.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.20.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.1.cross_attention.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.17.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.6.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.9.cross_attention_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.2.cross_attention_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.4.cross_attention.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.20.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.9.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.11.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.3.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.12.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.4.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.13.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.16.ffn.fc2.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.7.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.3.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.20.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.5.cross_attention.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.12.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.20.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.0.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.8.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.15.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.4.cross_attention.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.8.cross_attention_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.13.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.13.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.15.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.21.ffn.fc1.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.6.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.20.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.15.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.5.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.18.cross_attention.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.5.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.20.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.16.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.10.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.14.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.7.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.15.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.21.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.3.ffn.fc2.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.15.ffn.fc1.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.16.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.19.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.6.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.10.cross_attention.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.5.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.18.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.6.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.20.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.5.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.18.cross_attention.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.4.cross_attention_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.17.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.15.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.22.self_attn.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.4.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.19.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.12.cross_attention.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.11.ffn.fc2.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.17.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.5.ffn.fc1.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.9.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.1.self_attn.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.22.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.5.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.2.cross_attention.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.0.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.16.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.17.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.14.cross_attention.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.11.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.20.cross_attention.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.5.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.14.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.21.self_attn.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.6.self_attn.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.19.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.0.ffn.fc1.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.14.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.9.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.15.cross_attention_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.17.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.cross_attention.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.16.cross_attention_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.19.cross_attention.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.14.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.8.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.10.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.1.ffn.fc1.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.18.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.0.self_attn.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.14.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.3.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.23.cross_attention.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.14.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.4.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.21.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.8.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.23.cross_attention.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.cross_attention.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.ffn.fc1.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.9.ffn.fc2.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.10.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.16.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.15.self_attn.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.21.cross_attention_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.6.cross_attention.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.cross_attention.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.17.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.18.cross_attention.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.2.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.18.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.3.cross_attention.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.20.cross_attention.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.4.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.18.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.13.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.11.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.20.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.6.cross_attention.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.8.cross_attention.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.0.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.9.cross_attention.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.3.ffn.fc1.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.3.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.0.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.19.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.15.ffn.fc2.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.6.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.18.self_attn.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.12.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.0.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.22.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.8.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.8.cross_attention.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.23.cross_attention.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.2.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.5.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.11.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.12.ffn.fc2.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.11.cross_attention.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.16.cross_attention.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.0.ffn.fc2.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.5.cross_attention.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.19.cross_attention.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.0.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.11.cross_attention.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.17.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.2.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.10.cross_attention.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.23.self_attn.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.13.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.17.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.10.cross_attention.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.12.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.20.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.5.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.13.cross_attention.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.12.cross_attention.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.6.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.1.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.3.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.10.self_attn.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.11.ffn.fc1.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.19.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.7.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.9.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.10.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.12.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.17.cross_attention.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.22.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.9.cross_attention.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.19.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.5.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.cross_attention_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.13.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.1.cross_attention.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.23.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.12.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.20.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.16.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.18.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.23.ffn.fc2.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.21.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.22.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.0.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.14.ffn.fc1.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.4.cross_attention.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.7.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.4.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.22.cross_attention.v_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.1.cross_attention.out_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.13.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.10.cross_attention_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.23.ffn.fc1.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.0.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.1.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.19.ffn.fc1.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.4.self_attn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.6.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.10.cross_attention.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.18.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.15.cross_attention.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.22.self_attn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.15.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.14.cross_attention.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.19.cross_attention.q_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.23.cross_attention.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.5.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.1.self_attn.out_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.19.ffn_layer_norm.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.9.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.23.ffn_layer_norm.bias": "model-00001-of-00003.safetensors",
+    "text_encoder.layers.6.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.21.cross_attention.v_proj.bias": "model-00001-of-00003.safetensors",
+    "text_decoder.layers.21.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
+    "shared.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.9.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.19.ffn.fc2.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.12.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.11.cross_attention.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.10.ffn.fc2.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.13.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.19.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.3.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.21.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.4.self_attn.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.12.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.10.ffn.fc2.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.11.cross_attention_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.9.self_attn.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.17.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.3.cross_attention.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.16.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.21.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.0.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.11.cross_attention.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.8.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.15.cross_attention.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.16.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.5.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.23.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.6.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.10.cross_attention.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.21.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.12.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.5.ffn.fc2.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.23.ffn.fc2.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.17.cross_attention.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.13.cross_attention.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.1.ffn.fc2.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.16.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.8.cross_attention_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.6.ffn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.1.cross_attention.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.11.cross_attention.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.11.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.8.cross_attention.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.23.ffn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.4.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.5.cross_attention.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.23.cross_attention_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.22.cross_attention.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.19.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.0.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.18.cross_attention.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.0.cross_attention.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.4.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.10.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.10.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.12.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.20.cross_attention.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.9.cross_attention_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.22.cross_attention.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.20.cross_attention.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.20.self_attn.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.4.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.16.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.5.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.17.ffn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.12.cross_attention.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.5.ffn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.6.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.8.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.8.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.6.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.0.cross_attention_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.16.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.10.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.15.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.1.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.1.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.1.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.10.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.3.ffn.fc2.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.14.cross_attention.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.16.cross_attention.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.18.ffn.fc2.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.2.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.7.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.1.cross_attention_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.3.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.3.cross_attention.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.9.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.18.ffn.fc2.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.17.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.18.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.7.ffn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.18.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.19.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.11.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.19.cross_attention.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.14.ffn.fc2.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.1.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.19.ffn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.2.cross_attention.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.20.cross_attention.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.5.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.6.cross_attention.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.4.cross_attention.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.21.cross_attention.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.0.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.2.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.16.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.18.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.17.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.8.ffn.fc2.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.21.self_attn.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.12.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.9.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.20.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.11.ffn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.10.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.16.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.22.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.2.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.21.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.21.cross_attention.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.7.ffn.fc2.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.4.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.17.cross_attention.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.12.cross_attention.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.7.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.16.cross_attention.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.13.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.8.ffn.fc2.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.11.cross_attention.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.0.self_attn.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.15.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.4.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.17.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.5.ffn.fc2.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.10.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.4.cross_attention.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.7.ffn.fc2.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.5.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.12.ffn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.22.cross_attention.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.13.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.16.ffn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.20.ffn.fc2.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.14.self_attn.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.3.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.4.ffn.fc2.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.20.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.10.cross_attention.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.19.cross_attention_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.6.cross_attention.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.3.cross_attention.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.12.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.15.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.12.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.10.cross_attention.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.13.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.5.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.1.self_attn.k_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.0.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.3.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.23.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.2.cross_attention.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.2.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.10.ffn.fc2.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.22.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.14.cross_attention.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.3.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.16.cross_attention.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.9.cross_attention.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.12.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.10.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.9.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.4.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.20.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.23.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.6.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.7.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.18.cross_attention_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.22.cross_attention.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.12.self_attn.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.3.cross_attention.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.13.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.18.ffn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.9.self_attn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.17.self_attn.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.3.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.2.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.23.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.22.cross_attention_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.22.ffn.fc2.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.19.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.13.ffn.fc2.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.18.cross_attention_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.7.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.13.ffn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.1.self_attn.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.3.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.3.cross_attention_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.5.self_attn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.0.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.23.cross_attention.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.4.ffn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.13.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.23.cross_attention.q_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.15.cross_attention.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.6.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.20.ffn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.20.ffn.fc2.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.11.self_attn.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.6.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.8.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.22.ffn.fc1.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.22.self_attn.q_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.7.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.15.ffn_layer_norm.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.14.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.17.ffn.fc2.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.12.ffn.fc2.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.14.ffn.fc1.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.6.cross_attention.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.19.self_attn.v_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.17.ffn_layer_norm.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.11.self_attn.out_proj.bias": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.10.self_attn.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.19.cross_attention.out_proj.weight": "model-00002-of-00003.safetensors",
+    "text_encoder.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.2.cross_attention.v_proj.weight": "model-00002-of-00003.safetensors",
+    "text_decoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.11.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.18.cross_attention.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.cross_attention.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.19.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.0.cross_attention.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.cross_attention.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.5.cross_attention_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.5.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.8.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.15.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.14.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.1.cross_attention.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.9.cross_attention.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.2.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.0.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.18.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.17.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.14.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.15.cross_attention.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.0.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.16.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.16.cross_attention.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.22.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.0.cross_attention.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.1.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.2.cross_attention.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.4.cross_attention_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.7.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.19.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.15.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.7.cross_attention.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.4.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.3.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.14.cross_attention.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.13.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.11.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.17.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.0.cross_attention.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.3.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.22.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.16.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.0.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.4.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.6.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.22.cross_attention.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.17.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.20.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.9.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.11.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.22.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.cross_attention_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.14.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.17.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.16.cross_attention.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.7.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.13.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.16.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.7.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.15.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.12.cross_attention_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.13.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.cross_attention.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.10.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.7.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.9.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.9.cross_attention.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.23.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.3.cross_attention_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.7.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.7.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.16.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.8.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.9.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.16.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.15.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.17.cross_attention_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.10.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.6.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.21.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.1.cross_attention_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.17.cross_attention.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.18.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.4.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.21.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.cross_attention.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.19.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.10.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.9.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.11.cross_attention.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.4.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.5.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.12.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.2.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.18.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.2.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.20.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.2.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.5.cross_attention.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.16.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.1.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.14.cross_attention.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.6.cross_attention.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.16.cross_attention_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.0.cross_attention.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.9.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.6.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.18.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.15.cross_attention.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.17.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.8.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.8.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.17.cross_attention.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.0.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.3.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.11.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.20.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.3.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.9.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.2.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.7.cross_attention.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.12.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.cross_attention.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.0.cross_attention_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.2.cross_attention_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.7.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.18.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.10.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.23.cross_attention_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.9.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.4.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.cross_attention.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.20.cross_attention.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.19.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.4.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.17.cross_attention.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.19.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.13.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.cross_attention_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.23.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.15.cross_attention_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.5.cross_attention.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.17.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.1.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.14.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.2.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.18.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.19.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.19.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.14.cross_attention.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.7.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.0.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.16.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.20.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.9.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.18.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.5.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.17.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.18.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.8.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.19.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.4.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.21.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.8.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.10.cross_attention.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.20.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.1.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.19.cross_attention_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.3.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.10.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.2.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.20.cross_attention.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.4.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.20.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.14.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.23.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.20.cross_attention.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.19.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.12.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.18.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.11.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.23.cross_attention.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.15.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.5.cross_attention.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.23.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.11.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.3.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.2.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.22.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.6.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.19.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.14.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.16.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.6.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.12.cross_attention.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.6.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.3.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.15.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.10.cross_attention_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.3.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.4.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.9.cross_attention.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.7.cross_attention.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.4.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.17.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.4.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.cross_attention.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.14.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.1.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.19.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.15.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.20.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.16.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.2.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.12.cross_attention.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.9.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.17.cross_attention_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.23.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.2.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.22.cross_attention.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.4.cross_attention.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.15.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.11.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.5.cross_attention.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.1.cross_attention.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.7.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.18.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.0.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.23.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.18.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.0.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.19.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.2.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.22.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.6.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.18.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.1.cross_attention.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.23.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.10.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.1.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.cross_attention.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.1.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.23.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.17.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.19.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.10.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.16.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.18.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.0.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.11.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.7.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.0.cross_attention.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.11.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.3.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.22.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.3.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.23.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.9.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.10.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.22.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.6.cross_attention_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.cross_attention.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.11.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.2.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.18.cross_attention.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.14.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.15.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.22.cross_attention_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.1.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.0.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.14.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.1.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.22.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.22.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.22.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.21.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.22.cross_attention.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.23.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.12.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.1.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.22.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.21.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.9.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.5.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.6.cross_attention.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.17.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.cross_attention.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.17.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.2.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.cross_attention_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.1.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.1.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.7.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.0.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.23.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.2.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.19.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.14.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.22.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.2.cross_attention.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.12.cross_attention.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.15.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.15.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.18.cross_attention.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.7.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.16.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.10.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.12.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.13.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.11.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.23.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.1.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.19.cross_attention.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.14.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.9.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.9.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.14.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.cross_attention.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.9.cross_attention.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.5.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.12.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.21.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.14.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.12.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.11.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.8.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.9.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.1.ffn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.5.cross_attention_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.22.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.6.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.13.ffn.fc1.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.12.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.15.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.14.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.11.cross_attention.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.14.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.11.ffn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.3.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.20.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.16.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.6.cross_attention.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.3.ffn.fc1.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.11.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.20.cross_attention_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.13.self_attn.q_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.16.cross_attention.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.18.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.self_attn.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.5.cross_attention.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.4.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.17.self_attn.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.cross_attention.out_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.15.cross_attention.q_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.16.cross_attention.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.6.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.12.cross_attention_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.11.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.23.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.14.cross_attention_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.19.self_attn_layer_norm.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.9.ffn.fc2.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.13.cross_attention.out_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.0.self_attn.v_proj.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.11.ffn.fc2.bias": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.21.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
+    "text_decoder.layers.14.cross_attention.k_proj.weight": "model-00003-of-00003.safetensors",
+    "text_encoder.layers.9.self_attn_layer_norm.bias": "model-00003-of-00003.safetensors"
+  }
+}

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,111 @@

+{
+  "feature_extractor_type": "SeamlessM4TFeatureExtractor",
+  "feature_size": 80,
+  "language_code": [
+    "__afr__",
+    "__amh__",
+    "__arb__",
+    "__ary__",
+    "__arz__",
+    "__asm__",
+    "__azj__",
+    "__bel__",
+    "__ben__",
+    "__bos__",
+    "__bul__",
+    "__cat__",
+    "__ceb__",
+    "__ces__",
+    "__ckb__",
+    "__cmn__",
+    "__cmn_Hant__",
+    "__cym__",
+    "__dan__",
+    "__deu__",
+    "__ell__",
+    "__eng__",
+    "__est__",
+    "__eus__",
+    "__fin__",
+    "__fra__",
+    "__fuv__",
+    "__gaz__",
+    "__gle__",
+    "__glg__",
+    "__guj__",
+    "__heb__",
+    "__hin__",
+    "__hrv__",
+    "__hun__",
+    "__hye__",
+    "__ibo__",
+    "__ind__",
+    "__isl__",
+    "__ita__",
+    "__jav__",
+    "__jpn__",
+    "__kan__",
+    "__kat__",
+    "__kaz__",
+    "__khk__",
+    "__khm__",
+    "__kir__",
+    "__kor__",
+    "__lao__",
+    "__lit__",
+    "__lug__",
+    "__luo__",
+    "__lvs__",
+    "__mai__",
+    "__mal__",
+    "__mar__",
+    "__mkd__",
+    "__mlt__",
+    "__mni__",
+    "__mya__",
+    "__nld__",
+    "__nno__",
+    "__nob__",
+    "__npi__",
+    "__nya__",
+    "__ory__",
+    "__pan__",
+    "__pbt__",
+    "__pes__",
+    "__pol__",
+    "__por__",
+    "__ron__",
+    "__rus__",
+    "__sat__",
+    "__slk__",
+    "__slv__",
+    "__sna__",
+    "__snd__",
+    "__som__",
+    "__spa__",
+    "__srp__",
+    "__swe__",
+    "__swh__",
+    "__tam__",
+    "__tel__",
+    "__tgk__",
+    "__tgl__",
+    "__tha__",
+    "__tur__",
+    "__ukr__",
+    "__urd__",
+    "__uzn__",
+    "__vie__",
+    "__yor__",
+    "__yue__",
+    "__zlm__",
+    "__zul__"
+  ],
+  "num_mel_bins": 80,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "SeamlessM4TProcessor",
+  "return_attention_mask": true,
+  "sampling_rate": 16000,
+  "stride": 2
+}

sentencepiece.bpe.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:026a76827537db9f1348e4d5aaa127bb10a2f2ff633243f3a52d16be82d73f9d
+size 5165809

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,144 @@

+{
+  "additional_special_tokens": [
+    "__afr__",
+    "__amh__",
+    "__arb__",
+    "__ary__",
+    "__arz__",
+    "__asm__",
+    "__azj__",
+    "__bel__",
+    "__ben__",
+    "__bos__",
+    "__bul__",
+    "__cat__",
+    "__ceb__",
+    "__ces__",
+    "__ckb__",
+    "__cmn__",
+    "__cmn_Hant__",
+    "__cym__",
+    "__dan__",
+    "__deu__",
+    "__ell__",
+    "__eng__",
+    "__est__",
+    "__eus__",
+    "__fin__",
+    "__fra__",
+    "__fuv__",
+    "__gaz__",
+    "__gle__",
+    "__glg__",
+    "__guj__",
+    "__heb__",
+    "__hin__",
+    "__hrv__",
+    "__hun__",
+    "__hye__",
+    "__ibo__",
+    "__ind__",
+    "__isl__",
+    "__ita__",
+    "__jav__",
+    "__jpn__",
+    "__kan__",
+    "__kat__",
+    "__kaz__",
+    "__khk__",
+    "__khm__",
+    "__kir__",
+    "__kor__",
+    "__lao__",
+    "__lit__",
+    "__lug__",
+    "__luo__",
+    "__lvs__",
+    "__mai__",
+    "__mal__",
+    "__mar__",
+    "__mkd__",
+    "__mlt__",
+    "__mni__",
+    "__mya__",
+    "__nld__",
+    "__nno__",
+    "__nob__",
+    "__npi__",
+    "__nya__",
+    "__ory__",
+    "__pan__",
+    "__pbt__",
+    "__pes__",
+    "__pol__",
+    "__por__",
+    "__ron__",
+    "__rus__",
+    "__sat__",
+    "__slk__",
+    "__slv__",
+    "__sna__",
+    "__snd__",
+    "__som__",
+    "__spa__",
+    "__srp__",
+    "__swe__",
+    "__swh__",
+    "__tam__",
+    "__tel__",
+    "__tgk__",
+    "__tgl__",
+    "__tha__",
+    "__tur__",
+    "__ukr__",
+    "__urd__",
+    "__uzn__",
+    "__vie__",
+    "__yor__",
+    "__yue__",
+    "__zlm__",
+    "__zul__"
+  ],
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

spm_char_lang38_tc.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e7f2075dbc38dbe11d2414bfa4fb8e900022e87bbff4f74c97817e32a7ab493
+size 368901

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:026a76827537db9f1348e4d5aaa127bb10a2f2ff633243f3a52d16be82d73f9d
+size 5165809

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,933 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256001": {
+      "content": "__afr__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256002": {
+      "content": "__amh__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256003": {
+      "content": "__arb__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256004": {
+      "content": "__ary__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256005": {
+      "content": "__arz__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256006": {
+      "content": "__asm__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256007": {
+      "content": "__azj__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256008": {
+      "content": "__bel__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256009": {
+      "content": "__ben__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256010": {
+      "content": "__bos__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256011": {
+      "content": "__bul__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256012": {
+      "content": "__cat__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256013": {
+      "content": "__ceb__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256014": {
+      "content": "__ces__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256015": {
+      "content": "__ckb__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256016": {
+      "content": "__cmn__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256017": {
+      "content": "__cmn_Hant__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256018": {
+      "content": "__cym__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256019": {
+      "content": "__dan__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256020": {
+      "content": "__deu__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256021": {
+      "content": "__ell__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256022": {
+      "content": "__eng__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256023": {
+      "content": "__est__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256024": {
+      "content": "__eus__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256025": {
+      "content": "__fin__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256026": {
+      "content": "__fra__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256027": {
+      "content": "__fuv__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256028": {
+      "content": "__gaz__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256029": {
+      "content": "__gle__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256030": {
+      "content": "__glg__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256031": {
+      "content": "__guj__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256032": {
+      "content": "__heb__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256033": {
+      "content": "__hin__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256034": {
+      "content": "__hrv__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256035": {
+      "content": "__hun__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256036": {
+      "content": "__hye__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256037": {
+      "content": "__ibo__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256038": {
+      "content": "__ind__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256039": {
+      "content": "__isl__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256040": {
+      "content": "__ita__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256041": {
+      "content": "__jav__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256042": {
+      "content": "__jpn__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256043": {
+      "content": "__kan__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256044": {
+      "content": "__kat__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256045": {
+      "content": "__kaz__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256046": {
+      "content": "__khk__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256047": {
+      "content": "__khm__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256048": {
+      "content": "__kir__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256049": {
+      "content": "__kor__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256050": {
+      "content": "__lao__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256051": {
+      "content": "__lit__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256052": {
+      "content": "__lug__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256053": {
+      "content": "__luo__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256054": {
+      "content": "__lvs__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256055": {
+      "content": "__mai__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256056": {
+      "content": "__mal__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256057": {
+      "content": "__mar__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256058": {
+      "content": "__mkd__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256059": {
+      "content": "__mlt__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256060": {
+      "content": "__mni__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256061": {
+      "content": "__mya__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256062": {
+      "content": "__nld__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256063": {
+      "content": "__nno__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256064": {
+      "content": "__nob__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256065": {
+      "content": "__npi__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256066": {
+      "content": "__nya__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256067": {
+      "content": "__ory__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256068": {
+      "content": "__pan__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256069": {
+      "content": "__pbt__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256070": {
+      "content": "__pes__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256071": {
+      "content": "__pol__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256072": {
+      "content": "__por__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256073": {
+      "content": "__ron__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256074": {
+      "content": "__rus__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256075": {
+      "content": "__sat__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256076": {
+      "content": "__slk__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256077": {
+      "content": "__slv__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256078": {
+      "content": "__sna__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256079": {
+      "content": "__snd__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256080": {
+      "content": "__som__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256081": {
+      "content": "__spa__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256082": {
+      "content": "__srp__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256083": {
+      "content": "__swe__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256084": {
+      "content": "__swh__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256085": {
+      "content": "__tam__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256086": {
+      "content": "__tel__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256087": {
+      "content": "__tgk__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256088": {
+      "content": "__tgl__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256089": {
+      "content": "__tha__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256090": {
+      "content": "__tur__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256091": {
+      "content": "__ukr__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256092": {
+      "content": "__urd__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256093": {
+      "content": "__uzn__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256094": {
+      "content": "__vie__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256095": {
+      "content": "__yor__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256096": {
+      "content": "__yue__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256097": {
+      "content": "__zlm__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256098": {
+      "content": "__zul__",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "__afr__",
+    "__amh__",
+    "__arb__",
+    "__ary__",
+    "__arz__",
+    "__asm__",
+    "__azj__",
+    "__bel__",
+    "__ben__",
+    "__bos__",
+    "__bul__",
+    "__cat__",
+    "__ceb__",
+    "__ces__",
+    "__ckb__",
+    "__cmn__",
+    "__cmn_Hant__",
+    "__cym__",
+    "__dan__",
+    "__deu__",
+    "__ell__",
+    "__eng__",
+    "__est__",
+    "__eus__",
+    "__fin__",
+    "__fra__",
+    "__fuv__",
+    "__gaz__",
+    "__gle__",
+    "__glg__",
+    "__guj__",
+    "__heb__",
+    "__hin__",
+    "__hrv__",
+    "__hun__",
+    "__hye__",
+    "__ibo__",
+    "__ind__",
+    "__isl__",
+    "__ita__",
+    "__jav__",
+    "__jpn__",
+    "__kan__",
+    "__kat__",
+    "__kaz__",
+    "__khk__",
+    "__khm__",
+    "__kir__",
+    "__kor__",
+    "__lao__",
+    "__lit__",
+    "__lug__",
+    "__luo__",
+    "__lvs__",
+    "__mai__",
+    "__mal__",
+    "__mar__",
+    "__mkd__",
+    "__mlt__",
+    "__mni__",
+    "__mya__",
+    "__nld__",
+    "__nno__",
+    "__nob__",
+    "__npi__",
+    "__nya__",
+    "__ory__",
+    "__pan__",
+    "__pbt__",
+    "__pes__",
+    "__pol__",
+    "__por__",
+    "__ron__",
+    "__rus__",
+    "__sat__",
+    "__slk__",
+    "__slv__",
+    "__sna__",
+    "__snd__",
+    "__som__",
+    "__spa__",
+    "__srp__",
+    "__swe__",
+    "__swh__",
+    "__tam__",
+    "__tel__",
+    "__tgk__",
+    "__tgl__",
+    "__tha__",
+    "__tur__",
+    "__ukr__",
+    "__urd__",
+    "__uzn__",
+    "__vie__",
+    "__yor__",
+    "__yue__",
+    "__zlm__",
+    "__zul__"
+  ],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "processor_class": "SeamlessM4TProcessor",
+  "sep_token": "</s>",
+  "sp_model_kwargs": {},
+  "src_lang": "__eng__",
+  "tgt_lang": "__fra__",
+  "tokenizer_class": "SeamlessM4TTokenizer",
+  "unk_token": "<unk>"
+}