{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# Convert a HF finetuned Whisper model to GGML\n", "\n", "Reference: https://github.com/ggerganov/whisper.cpp/tree/master/models#fine-tuned-models" ], "metadata": { "id": "nZPl81t1Ruvk" } }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jzgovx6mRpHc", "outputId": "d95a18f3-579e-427a-d904-3976ecd6d896" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Reading package lists... Done\n", "Building dependency tree \n", "Reading state information... Done\n", "git-lfs is already the newest version (2.9.2-1).\n", "0 upgraded, 0 newly installed, 0 to remove and 23 not upgraded.\n", "fatal: destination path 'whisper' already exists and is not an empty directory.\n", "fatal: destination path 'whisper.cpp' already exists and is not an empty directory.\n", "fatal: destination path 'whisper-small-eu-v2' already exists and is not an empty directory.\n" ] } ], "source": [ "# Download the repos\n", "!git clone https://github.com/openai/whisper\n", "!git clone https://github.com/ggerganov/whisper.cpp\n", "\n", "# clone HF fine-tuned model (this is just an example)\n", "!git clone https://huggingface.co/xezpeleta/whisper-small-eu-v2" ] }, { "cell_type": "code", "source": [ "# Install required packages\n", "!pip install transformers" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "lncO4nydT0xI", "outputId": "f81184f4-7168-42a5-97df-d29b3ee7ac0c" }, "execution_count": 6, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Collecting transformers\n", " Downloading transformers-4.27.4-py3-none-any.whl (6.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.8/6.8 MB\u001b[0m \u001b[31m84.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.9/dist-packages (from transformers) (23.0)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (1.22.4)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.9/dist-packages (from transformers) (2.27.1)\n", "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\n", " Downloading tokenizers-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m88.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.9/dist-packages (from transformers) (3.10.7)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.9/dist-packages (from transformers) (4.65.0)\n", "Collecting huggingface-hub<1.0,>=0.11.0\n", " Downloading huggingface_hub-0.13.3-py3-none-any.whl (199 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.8/199.8 KB\u001b[0m \u001b[31m21.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.9/dist-packages (from transformers) (2022.10.31)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.9/dist-packages (from transformers) (6.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.9/dist-packages (from huggingface-hub<1.0,>=0.11.0->transformers) (4.5.0)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (1.26.15)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2.0.12)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (2022.12.7)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.9/dist-packages (from requests->transformers) (3.4)\n", "Installing collected packages: tokenizers, huggingface-hub, transformers\n", "Successfully installed huggingface-hub-0.13.3 tokenizers-0.13.2 transformers-4.27.4\n" ] } ] }, { "cell_type": "code", "source": [ "# Convert the model to ggml\n", "!python3 ./whisper.cpp/models/convert-h5-to-ggml.py ./whisper-small-eu-v2/ ./whisper ." ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "uIkTQr8yTfWP", "outputId": "ce904702-5317-48a5-9f3b-2f0c2ba126ef" }, "execution_count": 7, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "model.encoder.conv1.weight -> encoder.conv1.weight\n", "encoder.conv1.weight 3 (768, 80, 3)\n", "model.encoder.conv1.bias -> encoder.conv1.bias\n", " Reshaped variable: encoder.conv1.bias to shape: (768, 1)\n", "encoder.conv1.bias 2 (768, 1)\n", " Converting to float32\n", "model.encoder.conv2.weight -> encoder.conv2.weight\n", "encoder.conv2.weight 3 (768, 768, 3)\n", "model.encoder.conv2.bias -> encoder.conv2.bias\n", " Reshaped variable: encoder.conv2.bias to shape: (768, 1)\n", "encoder.conv2.bias 2 (768, 1)\n", " Converting to float32\n", "model.encoder.embed_positions.weight -> encoder.positional_embedding\n", "encoder.positional_embedding 2 (1500, 768)\n", " Converting to float32\n", "model.encoder.layers.0.self_attn.k_proj.weight -> encoder.blocks.0.attn.key.weight\n", "encoder.blocks.0.attn.key.weight 2 (768, 768)\n", "model.encoder.layers.0.self_attn.v_proj.weight -> encoder.blocks.0.attn.value.weight\n", "encoder.blocks.0.attn.value.weight 2 (768, 768)\n", "model.encoder.layers.0.self_attn.v_proj.bias -> encoder.blocks.0.attn.value.bias\n", "encoder.blocks.0.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.0.self_attn.q_proj.weight -> encoder.blocks.0.attn.query.weight\n", "encoder.blocks.0.attn.query.weight 2 (768, 768)\n", "model.encoder.layers.0.self_attn.q_proj.bias -> encoder.blocks.0.attn.query.bias\n", "encoder.blocks.0.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.0.self_attn.out_proj.weight -> encoder.blocks.0.attn.out.weight\n", "encoder.blocks.0.attn.out.weight 2 (768, 768)\n", "model.encoder.layers.0.self_attn.out_proj.bias -> encoder.blocks.0.attn.out.bias\n", "encoder.blocks.0.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.0.self_attn_layer_norm.weight -> encoder.blocks.0.attn_ln.weight\n", "encoder.blocks.0.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.0.self_attn_layer_norm.bias -> encoder.blocks.0.attn_ln.bias\n", "encoder.blocks.0.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.0.fc1.weight -> encoder.blocks.0.mlp.0.weight\n", "encoder.blocks.0.mlp.0.weight 2 (3072, 768)\n", "model.encoder.layers.0.fc1.bias -> encoder.blocks.0.mlp.0.bias\n", "encoder.blocks.0.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.encoder.layers.0.fc2.weight -> encoder.blocks.0.mlp.2.weight\n", "encoder.blocks.0.mlp.2.weight 2 (768, 3072)\n", "model.encoder.layers.0.fc2.bias -> encoder.blocks.0.mlp.2.bias\n", "encoder.blocks.0.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.0.final_layer_norm.weight -> encoder.blocks.0.mlp_ln.weight\n", "encoder.blocks.0.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.0.final_layer_norm.bias -> encoder.blocks.0.mlp_ln.bias\n", "encoder.blocks.0.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.1.self_attn.k_proj.weight -> encoder.blocks.1.attn.key.weight\n", "encoder.blocks.1.attn.key.weight 2 (768, 768)\n", "model.encoder.layers.1.self_attn.v_proj.weight -> encoder.blocks.1.attn.value.weight\n", "encoder.blocks.1.attn.value.weight 2 (768, 768)\n", "model.encoder.layers.1.self_attn.v_proj.bias -> encoder.blocks.1.attn.value.bias\n", "encoder.blocks.1.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.1.self_attn.q_proj.weight -> encoder.blocks.1.attn.query.weight\n", "encoder.blocks.1.attn.query.weight 2 (768, 768)\n", "model.encoder.layers.1.self_attn.q_proj.bias -> encoder.blocks.1.attn.query.bias\n", "encoder.blocks.1.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.1.self_attn.out_proj.weight -> encoder.blocks.1.attn.out.weight\n", "encoder.blocks.1.attn.out.weight 2 (768, 768)\n", "model.encoder.layers.1.self_attn.out_proj.bias -> encoder.blocks.1.attn.out.bias\n", "encoder.blocks.1.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.1.self_attn_layer_norm.weight -> encoder.blocks.1.attn_ln.weight\n", "encoder.blocks.1.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.1.self_attn_layer_norm.bias -> encoder.blocks.1.attn_ln.bias\n", "encoder.blocks.1.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.1.fc1.weight -> encoder.blocks.1.mlp.0.weight\n", "encoder.blocks.1.mlp.0.weight 2 (3072, 768)\n", "model.encoder.layers.1.fc1.bias -> encoder.blocks.1.mlp.0.bias\n", "encoder.blocks.1.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.encoder.layers.1.fc2.weight -> encoder.blocks.1.mlp.2.weight\n", "encoder.blocks.1.mlp.2.weight 2 (768, 3072)\n", "model.encoder.layers.1.fc2.bias -> encoder.blocks.1.mlp.2.bias\n", "encoder.blocks.1.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.1.final_layer_norm.weight -> encoder.blocks.1.mlp_ln.weight\n", "encoder.blocks.1.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.1.final_layer_norm.bias -> encoder.blocks.1.mlp_ln.bias\n", "encoder.blocks.1.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.2.self_attn.k_proj.weight -> encoder.blocks.2.attn.key.weight\n", "encoder.blocks.2.attn.key.weight 2 (768, 768)\n", "model.encoder.layers.2.self_attn.v_proj.weight -> encoder.blocks.2.attn.value.weight\n", "encoder.blocks.2.attn.value.weight 2 (768, 768)\n", "model.encoder.layers.2.self_attn.v_proj.bias -> encoder.blocks.2.attn.value.bias\n", "encoder.blocks.2.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.2.self_attn.q_proj.weight -> encoder.blocks.2.attn.query.weight\n", "encoder.blocks.2.attn.query.weight 2 (768, 768)\n", "model.encoder.layers.2.self_attn.q_proj.bias -> encoder.blocks.2.attn.query.bias\n", "encoder.blocks.2.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.2.self_attn.out_proj.weight -> encoder.blocks.2.attn.out.weight\n", "encoder.blocks.2.attn.out.weight 2 (768, 768)\n", "model.encoder.layers.2.self_attn.out_proj.bias -> encoder.blocks.2.attn.out.bias\n", "encoder.blocks.2.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.2.self_attn_layer_norm.weight -> encoder.blocks.2.attn_ln.weight\n", "encoder.blocks.2.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.2.self_attn_layer_norm.bias -> encoder.blocks.2.attn_ln.bias\n", "encoder.blocks.2.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.2.fc1.weight -> encoder.blocks.2.mlp.0.weight\n", "encoder.blocks.2.mlp.0.weight 2 (3072, 768)\n", "model.encoder.layers.2.fc1.bias -> encoder.blocks.2.mlp.0.bias\n", "encoder.blocks.2.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.encoder.layers.2.fc2.weight -> encoder.blocks.2.mlp.2.weight\n", "encoder.blocks.2.mlp.2.weight 2 (768, 3072)\n", "model.encoder.layers.2.fc2.bias -> encoder.blocks.2.mlp.2.bias\n", "encoder.blocks.2.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.2.final_layer_norm.weight -> encoder.blocks.2.mlp_ln.weight\n", "encoder.blocks.2.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.2.final_layer_norm.bias -> encoder.blocks.2.mlp_ln.bias\n", "encoder.blocks.2.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.3.self_attn.k_proj.weight -> encoder.blocks.3.attn.key.weight\n", "encoder.blocks.3.attn.key.weight 2 (768, 768)\n", "model.encoder.layers.3.self_attn.v_proj.weight -> encoder.blocks.3.attn.value.weight\n", "encoder.blocks.3.attn.value.weight 2 (768, 768)\n", "model.encoder.layers.3.self_attn.v_proj.bias -> encoder.blocks.3.attn.value.bias\n", "encoder.blocks.3.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.3.self_attn.q_proj.weight -> encoder.blocks.3.attn.query.weight\n", "encoder.blocks.3.attn.query.weight 2 (768, 768)\n", "model.encoder.layers.3.self_attn.q_proj.bias -> encoder.blocks.3.attn.query.bias\n", "encoder.blocks.3.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.3.self_attn.out_proj.weight -> encoder.blocks.3.attn.out.weight\n", "encoder.blocks.3.attn.out.weight 2 (768, 768)\n", "model.encoder.layers.3.self_attn.out_proj.bias -> encoder.blocks.3.attn.out.bias\n", "encoder.blocks.3.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.3.self_attn_layer_norm.weight -> encoder.blocks.3.attn_ln.weight\n", "encoder.blocks.3.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.3.self_attn_layer_norm.bias -> encoder.blocks.3.attn_ln.bias\n", "encoder.blocks.3.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.3.fc1.weight -> encoder.blocks.3.mlp.0.weight\n", "encoder.blocks.3.mlp.0.weight 2 (3072, 768)\n", "model.encoder.layers.3.fc1.bias -> encoder.blocks.3.mlp.0.bias\n", "encoder.blocks.3.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.encoder.layers.3.fc2.weight -> encoder.blocks.3.mlp.2.weight\n", "encoder.blocks.3.mlp.2.weight 2 (768, 3072)\n", "model.encoder.layers.3.fc2.bias -> encoder.blocks.3.mlp.2.bias\n", "encoder.blocks.3.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.3.final_layer_norm.weight -> encoder.blocks.3.mlp_ln.weight\n", "encoder.blocks.3.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.3.final_layer_norm.bias -> encoder.blocks.3.mlp_ln.bias\n", "encoder.blocks.3.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.4.self_attn.k_proj.weight -> encoder.blocks.4.attn.key.weight\n", "encoder.blocks.4.attn.key.weight 2 (768, 768)\n", "model.encoder.layers.4.self_attn.v_proj.weight -> encoder.blocks.4.attn.value.weight\n", "encoder.blocks.4.attn.value.weight 2 (768, 768)\n", "model.encoder.layers.4.self_attn.v_proj.bias -> encoder.blocks.4.attn.value.bias\n", "encoder.blocks.4.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.4.self_attn.q_proj.weight -> encoder.blocks.4.attn.query.weight\n", "encoder.blocks.4.attn.query.weight 2 (768, 768)\n", "model.encoder.layers.4.self_attn.q_proj.bias -> encoder.blocks.4.attn.query.bias\n", "encoder.blocks.4.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.4.self_attn.out_proj.weight -> encoder.blocks.4.attn.out.weight\n", "encoder.blocks.4.attn.out.weight 2 (768, 768)\n", "model.encoder.layers.4.self_attn.out_proj.bias -> encoder.blocks.4.attn.out.bias\n", "encoder.blocks.4.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.4.self_attn_layer_norm.weight -> encoder.blocks.4.attn_ln.weight\n", "encoder.blocks.4.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.4.self_attn_layer_norm.bias -> encoder.blocks.4.attn_ln.bias\n", "encoder.blocks.4.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.4.fc1.weight -> encoder.blocks.4.mlp.0.weight\n", "encoder.blocks.4.mlp.0.weight 2 (3072, 768)\n", "model.encoder.layers.4.fc1.bias -> encoder.blocks.4.mlp.0.bias\n", "encoder.blocks.4.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.encoder.layers.4.fc2.weight -> encoder.blocks.4.mlp.2.weight\n", "encoder.blocks.4.mlp.2.weight 2 (768, 3072)\n", "model.encoder.layers.4.fc2.bias -> encoder.blocks.4.mlp.2.bias\n", "encoder.blocks.4.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.4.final_layer_norm.weight -> encoder.blocks.4.mlp_ln.weight\n", "encoder.blocks.4.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.4.final_layer_norm.bias -> encoder.blocks.4.mlp_ln.bias\n", "encoder.blocks.4.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.5.self_attn.k_proj.weight -> encoder.blocks.5.attn.key.weight\n", "encoder.blocks.5.attn.key.weight 2 (768, 768)\n", "model.encoder.layers.5.self_attn.v_proj.weight -> encoder.blocks.5.attn.value.weight\n", "encoder.blocks.5.attn.value.weight 2 (768, 768)\n", "model.encoder.layers.5.self_attn.v_proj.bias -> encoder.blocks.5.attn.value.bias\n", "encoder.blocks.5.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.5.self_attn.q_proj.weight -> encoder.blocks.5.attn.query.weight\n", "encoder.blocks.5.attn.query.weight 2 (768, 768)\n", "model.encoder.layers.5.self_attn.q_proj.bias -> encoder.blocks.5.attn.query.bias\n", "encoder.blocks.5.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.5.self_attn.out_proj.weight -> encoder.blocks.5.attn.out.weight\n", "encoder.blocks.5.attn.out.weight 2 (768, 768)\n", "model.encoder.layers.5.self_attn.out_proj.bias -> encoder.blocks.5.attn.out.bias\n", "encoder.blocks.5.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.5.self_attn_layer_norm.weight -> encoder.blocks.5.attn_ln.weight\n", "encoder.blocks.5.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.5.self_attn_layer_norm.bias -> encoder.blocks.5.attn_ln.bias\n", "encoder.blocks.5.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.5.fc1.weight -> encoder.blocks.5.mlp.0.weight\n", "encoder.blocks.5.mlp.0.weight 2 (3072, 768)\n", "model.encoder.layers.5.fc1.bias -> encoder.blocks.5.mlp.0.bias\n", "encoder.blocks.5.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.encoder.layers.5.fc2.weight -> encoder.blocks.5.mlp.2.weight\n", "encoder.blocks.5.mlp.2.weight 2 (768, 3072)\n", "model.encoder.layers.5.fc2.bias -> encoder.blocks.5.mlp.2.bias\n", "encoder.blocks.5.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.5.final_layer_norm.weight -> encoder.blocks.5.mlp_ln.weight\n", "encoder.blocks.5.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.5.final_layer_norm.bias -> encoder.blocks.5.mlp_ln.bias\n", "encoder.blocks.5.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.6.self_attn.k_proj.weight -> encoder.blocks.6.attn.key.weight\n", "encoder.blocks.6.attn.key.weight 2 (768, 768)\n", "model.encoder.layers.6.self_attn.v_proj.weight -> encoder.blocks.6.attn.value.weight\n", "encoder.blocks.6.attn.value.weight 2 (768, 768)\n", "model.encoder.layers.6.self_attn.v_proj.bias -> encoder.blocks.6.attn.value.bias\n", "encoder.blocks.6.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.6.self_attn.q_proj.weight -> encoder.blocks.6.attn.query.weight\n", "encoder.blocks.6.attn.query.weight 2 (768, 768)\n", "model.encoder.layers.6.self_attn.q_proj.bias -> encoder.blocks.6.attn.query.bias\n", "encoder.blocks.6.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.6.self_attn.out_proj.weight -> encoder.blocks.6.attn.out.weight\n", "encoder.blocks.6.attn.out.weight 2 (768, 768)\n", "model.encoder.layers.6.self_attn.out_proj.bias -> encoder.blocks.6.attn.out.bias\n", "encoder.blocks.6.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.6.self_attn_layer_norm.weight -> encoder.blocks.6.attn_ln.weight\n", "encoder.blocks.6.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.6.self_attn_layer_norm.bias -> encoder.blocks.6.attn_ln.bias\n", "encoder.blocks.6.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.6.fc1.weight -> encoder.blocks.6.mlp.0.weight\n", "encoder.blocks.6.mlp.0.weight 2 (3072, 768)\n", "model.encoder.layers.6.fc1.bias -> encoder.blocks.6.mlp.0.bias\n", "encoder.blocks.6.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.encoder.layers.6.fc2.weight -> encoder.blocks.6.mlp.2.weight\n", "encoder.blocks.6.mlp.2.weight 2 (768, 3072)\n", "model.encoder.layers.6.fc2.bias -> encoder.blocks.6.mlp.2.bias\n", "encoder.blocks.6.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.6.final_layer_norm.weight -> encoder.blocks.6.mlp_ln.weight\n", "encoder.blocks.6.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.6.final_layer_norm.bias -> encoder.blocks.6.mlp_ln.bias\n", "encoder.blocks.6.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.7.self_attn.k_proj.weight -> encoder.blocks.7.attn.key.weight\n", "encoder.blocks.7.attn.key.weight 2 (768, 768)\n", "model.encoder.layers.7.self_attn.v_proj.weight -> encoder.blocks.7.attn.value.weight\n", "encoder.blocks.7.attn.value.weight 2 (768, 768)\n", "model.encoder.layers.7.self_attn.v_proj.bias -> encoder.blocks.7.attn.value.bias\n", "encoder.blocks.7.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.7.self_attn.q_proj.weight -> encoder.blocks.7.attn.query.weight\n", "encoder.blocks.7.attn.query.weight 2 (768, 768)\n", "model.encoder.layers.7.self_attn.q_proj.bias -> encoder.blocks.7.attn.query.bias\n", "encoder.blocks.7.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.7.self_attn.out_proj.weight -> encoder.blocks.7.attn.out.weight\n", "encoder.blocks.7.attn.out.weight 2 (768, 768)\n", "model.encoder.layers.7.self_attn.out_proj.bias -> encoder.blocks.7.attn.out.bias\n", "encoder.blocks.7.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.7.self_attn_layer_norm.weight -> encoder.blocks.7.attn_ln.weight\n", "encoder.blocks.7.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.7.self_attn_layer_norm.bias -> encoder.blocks.7.attn_ln.bias\n", "encoder.blocks.7.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.7.fc1.weight -> encoder.blocks.7.mlp.0.weight\n", "encoder.blocks.7.mlp.0.weight 2 (3072, 768)\n", "model.encoder.layers.7.fc1.bias -> encoder.blocks.7.mlp.0.bias\n", "encoder.blocks.7.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.encoder.layers.7.fc2.weight -> encoder.blocks.7.mlp.2.weight\n", "encoder.blocks.7.mlp.2.weight 2 (768, 3072)\n", "model.encoder.layers.7.fc2.bias -> encoder.blocks.7.mlp.2.bias\n", "encoder.blocks.7.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.7.final_layer_norm.weight -> encoder.blocks.7.mlp_ln.weight\n", "encoder.blocks.7.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.7.final_layer_norm.bias -> encoder.blocks.7.mlp_ln.bias\n", "encoder.blocks.7.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.8.self_attn.k_proj.weight -> encoder.blocks.8.attn.key.weight\n", "encoder.blocks.8.attn.key.weight 2 (768, 768)\n", "model.encoder.layers.8.self_attn.v_proj.weight -> encoder.blocks.8.attn.value.weight\n", "encoder.blocks.8.attn.value.weight 2 (768, 768)\n", "model.encoder.layers.8.self_attn.v_proj.bias -> encoder.blocks.8.attn.value.bias\n", "encoder.blocks.8.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.8.self_attn.q_proj.weight -> encoder.blocks.8.attn.query.weight\n", "encoder.blocks.8.attn.query.weight 2 (768, 768)\n", "model.encoder.layers.8.self_attn.q_proj.bias -> encoder.blocks.8.attn.query.bias\n", "encoder.blocks.8.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.8.self_attn.out_proj.weight -> encoder.blocks.8.attn.out.weight\n", "encoder.blocks.8.attn.out.weight 2 (768, 768)\n", "model.encoder.layers.8.self_attn.out_proj.bias -> encoder.blocks.8.attn.out.bias\n", "encoder.blocks.8.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.8.self_attn_layer_norm.weight -> encoder.blocks.8.attn_ln.weight\n", "encoder.blocks.8.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.8.self_attn_layer_norm.bias -> encoder.blocks.8.attn_ln.bias\n", "encoder.blocks.8.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.8.fc1.weight -> encoder.blocks.8.mlp.0.weight\n", "encoder.blocks.8.mlp.0.weight 2 (3072, 768)\n", "model.encoder.layers.8.fc1.bias -> encoder.blocks.8.mlp.0.bias\n", "encoder.blocks.8.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.encoder.layers.8.fc2.weight -> encoder.blocks.8.mlp.2.weight\n", "encoder.blocks.8.mlp.2.weight 2 (768, 3072)\n", "model.encoder.layers.8.fc2.bias -> encoder.blocks.8.mlp.2.bias\n", "encoder.blocks.8.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.8.final_layer_norm.weight -> encoder.blocks.8.mlp_ln.weight\n", "encoder.blocks.8.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.8.final_layer_norm.bias -> encoder.blocks.8.mlp_ln.bias\n", "encoder.blocks.8.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.9.self_attn.k_proj.weight -> encoder.blocks.9.attn.key.weight\n", "encoder.blocks.9.attn.key.weight 2 (768, 768)\n", "model.encoder.layers.9.self_attn.v_proj.weight -> encoder.blocks.9.attn.value.weight\n", "encoder.blocks.9.attn.value.weight 2 (768, 768)\n", "model.encoder.layers.9.self_attn.v_proj.bias -> encoder.blocks.9.attn.value.bias\n", "encoder.blocks.9.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.9.self_attn.q_proj.weight -> encoder.blocks.9.attn.query.weight\n", "encoder.blocks.9.attn.query.weight 2 (768, 768)\n", "model.encoder.layers.9.self_attn.q_proj.bias -> encoder.blocks.9.attn.query.bias\n", "encoder.blocks.9.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.9.self_attn.out_proj.weight -> encoder.blocks.9.attn.out.weight\n", "encoder.blocks.9.attn.out.weight 2 (768, 768)\n", "model.encoder.layers.9.self_attn.out_proj.bias -> encoder.blocks.9.attn.out.bias\n", "encoder.blocks.9.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.9.self_attn_layer_norm.weight -> encoder.blocks.9.attn_ln.weight\n", "encoder.blocks.9.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.9.self_attn_layer_norm.bias -> encoder.blocks.9.attn_ln.bias\n", "encoder.blocks.9.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.9.fc1.weight -> encoder.blocks.9.mlp.0.weight\n", "encoder.blocks.9.mlp.0.weight 2 (3072, 768)\n", "model.encoder.layers.9.fc1.bias -> encoder.blocks.9.mlp.0.bias\n", "encoder.blocks.9.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.encoder.layers.9.fc2.weight -> encoder.blocks.9.mlp.2.weight\n", "encoder.blocks.9.mlp.2.weight 2 (768, 3072)\n", "model.encoder.layers.9.fc2.bias -> encoder.blocks.9.mlp.2.bias\n", "encoder.blocks.9.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.9.final_layer_norm.weight -> encoder.blocks.9.mlp_ln.weight\n", "encoder.blocks.9.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.9.final_layer_norm.bias -> encoder.blocks.9.mlp_ln.bias\n", "encoder.blocks.9.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.10.self_attn.k_proj.weight -> encoder.blocks.10.attn.key.weight\n", "encoder.blocks.10.attn.key.weight 2 (768, 768)\n", "model.encoder.layers.10.self_attn.v_proj.weight -> encoder.blocks.10.attn.value.weight\n", "encoder.blocks.10.attn.value.weight 2 (768, 768)\n", "model.encoder.layers.10.self_attn.v_proj.bias -> encoder.blocks.10.attn.value.bias\n", "encoder.blocks.10.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.10.self_attn.q_proj.weight -> encoder.blocks.10.attn.query.weight\n", "encoder.blocks.10.attn.query.weight 2 (768, 768)\n", "model.encoder.layers.10.self_attn.q_proj.bias -> encoder.blocks.10.attn.query.bias\n", "encoder.blocks.10.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.10.self_attn.out_proj.weight -> encoder.blocks.10.attn.out.weight\n", "encoder.blocks.10.attn.out.weight 2 (768, 768)\n", "model.encoder.layers.10.self_attn.out_proj.bias -> encoder.blocks.10.attn.out.bias\n", "encoder.blocks.10.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.10.self_attn_layer_norm.weight -> encoder.blocks.10.attn_ln.weight\n", "encoder.blocks.10.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.10.self_attn_layer_norm.bias -> encoder.blocks.10.attn_ln.bias\n", "encoder.blocks.10.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.10.fc1.weight -> encoder.blocks.10.mlp.0.weight\n", "encoder.blocks.10.mlp.0.weight 2 (3072, 768)\n", "model.encoder.layers.10.fc1.bias -> encoder.blocks.10.mlp.0.bias\n", "encoder.blocks.10.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.encoder.layers.10.fc2.weight -> encoder.blocks.10.mlp.2.weight\n", "encoder.blocks.10.mlp.2.weight 2 (768, 3072)\n", "model.encoder.layers.10.fc2.bias -> encoder.blocks.10.mlp.2.bias\n", "encoder.blocks.10.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.10.final_layer_norm.weight -> encoder.blocks.10.mlp_ln.weight\n", "encoder.blocks.10.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.10.final_layer_norm.bias -> encoder.blocks.10.mlp_ln.bias\n", "encoder.blocks.10.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.11.self_attn.k_proj.weight -> encoder.blocks.11.attn.key.weight\n", "encoder.blocks.11.attn.key.weight 2 (768, 768)\n", "model.encoder.layers.11.self_attn.v_proj.weight -> encoder.blocks.11.attn.value.weight\n", "encoder.blocks.11.attn.value.weight 2 (768, 768)\n", "model.encoder.layers.11.self_attn.v_proj.bias -> encoder.blocks.11.attn.value.bias\n", "encoder.blocks.11.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.11.self_attn.q_proj.weight -> encoder.blocks.11.attn.query.weight\n", "encoder.blocks.11.attn.query.weight 2 (768, 768)\n", "model.encoder.layers.11.self_attn.q_proj.bias -> encoder.blocks.11.attn.query.bias\n", "encoder.blocks.11.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.11.self_attn.out_proj.weight -> encoder.blocks.11.attn.out.weight\n", "encoder.blocks.11.attn.out.weight 2 (768, 768)\n", "model.encoder.layers.11.self_attn.out_proj.bias -> encoder.blocks.11.attn.out.bias\n", "encoder.blocks.11.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.11.self_attn_layer_norm.weight -> encoder.blocks.11.attn_ln.weight\n", "encoder.blocks.11.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.11.self_attn_layer_norm.bias -> encoder.blocks.11.attn_ln.bias\n", "encoder.blocks.11.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.11.fc1.weight -> encoder.blocks.11.mlp.0.weight\n", "encoder.blocks.11.mlp.0.weight 2 (3072, 768)\n", "model.encoder.layers.11.fc1.bias -> encoder.blocks.11.mlp.0.bias\n", "encoder.blocks.11.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.encoder.layers.11.fc2.weight -> encoder.blocks.11.mlp.2.weight\n", "encoder.blocks.11.mlp.2.weight 2 (768, 3072)\n", "model.encoder.layers.11.fc2.bias -> encoder.blocks.11.mlp.2.bias\n", "encoder.blocks.11.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.11.final_layer_norm.weight -> encoder.blocks.11.mlp_ln.weight\n", "encoder.blocks.11.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layers.11.final_layer_norm.bias -> encoder.blocks.11.mlp_ln.bias\n", "encoder.blocks.11.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.encoder.layer_norm.weight -> encoder.ln_post.weight\n", "encoder.ln_post.weight 1 (768,)\n", " Converting to float32\n", "model.encoder.layer_norm.bias -> encoder.ln_post.bias\n", "encoder.ln_post.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.embed_tokens.weight -> decoder.token_embedding.weight\n", "decoder.token_embedding.weight 2 (51865, 768)\n", "model.decoder.embed_positions.weight -> decoder.positional_embedding\n", "decoder.positional_embedding 2 (448, 768)\n", " Converting to float32\n", "model.decoder.layers.0.self_attn.k_proj.weight -> decoder.blocks.0.attn.key.weight\n", "decoder.blocks.0.attn.key.weight 2 (768, 768)\n", "model.decoder.layers.0.self_attn.v_proj.weight -> decoder.blocks.0.attn.value.weight\n", "decoder.blocks.0.attn.value.weight 2 (768, 768)\n", "model.decoder.layers.0.self_attn.v_proj.bias -> decoder.blocks.0.attn.value.bias\n", "decoder.blocks.0.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.0.self_attn.q_proj.weight -> decoder.blocks.0.attn.query.weight\n", "decoder.blocks.0.attn.query.weight 2 (768, 768)\n", "model.decoder.layers.0.self_attn.q_proj.bias -> decoder.blocks.0.attn.query.bias\n", "decoder.blocks.0.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.0.self_attn.out_proj.weight -> decoder.blocks.0.attn.out.weight\n", "decoder.blocks.0.attn.out.weight 2 (768, 768)\n", "model.decoder.layers.0.self_attn.out_proj.bias -> decoder.blocks.0.attn.out.bias\n", "decoder.blocks.0.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.0.self_attn_layer_norm.weight -> decoder.blocks.0.attn_ln.weight\n", "decoder.blocks.0.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.0.self_attn_layer_norm.bias -> decoder.blocks.0.attn_ln.bias\n", "decoder.blocks.0.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.0.encoder_attn.k_proj.weight -> decoder.blocks.0.cross_attn.key.weight\n", "decoder.blocks.0.cross_attn.key.weight 2 (768, 768)\n", "model.decoder.layers.0.encoder_attn.v_proj.weight -> decoder.blocks.0.cross_attn.value.weight\n", "decoder.blocks.0.cross_attn.value.weight 2 (768, 768)\n", "model.decoder.layers.0.encoder_attn.v_proj.bias -> decoder.blocks.0.cross_attn.value.bias\n", "decoder.blocks.0.cross_attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.0.encoder_attn.q_proj.weight -> decoder.blocks.0.cross_attn.query.weight\n", "decoder.blocks.0.cross_attn.query.weight 2 (768, 768)\n", "model.decoder.layers.0.encoder_attn.q_proj.bias -> decoder.blocks.0.cross_attn.query.bias\n", "decoder.blocks.0.cross_attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.0.encoder_attn.out_proj.weight -> decoder.blocks.0.cross_attn.out.weight\n", "decoder.blocks.0.cross_attn.out.weight 2 (768, 768)\n", "model.decoder.layers.0.encoder_attn.out_proj.bias -> decoder.blocks.0.cross_attn.out.bias\n", "decoder.blocks.0.cross_attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.0.encoder_attn_layer_norm.weight -> decoder.blocks.0.cross_attn_ln.weight\n", "decoder.blocks.0.cross_attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.0.encoder_attn_layer_norm.bias -> decoder.blocks.0.cross_attn_ln.bias\n", "decoder.blocks.0.cross_attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.0.fc1.weight -> decoder.blocks.0.mlp.0.weight\n", "decoder.blocks.0.mlp.0.weight 2 (3072, 768)\n", "model.decoder.layers.0.fc1.bias -> decoder.blocks.0.mlp.0.bias\n", "decoder.blocks.0.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.decoder.layers.0.fc2.weight -> decoder.blocks.0.mlp.2.weight\n", "decoder.blocks.0.mlp.2.weight 2 (768, 3072)\n", "model.decoder.layers.0.fc2.bias -> decoder.blocks.0.mlp.2.bias\n", "decoder.blocks.0.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.0.final_layer_norm.weight -> decoder.blocks.0.mlp_ln.weight\n", "decoder.blocks.0.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.0.final_layer_norm.bias -> decoder.blocks.0.mlp_ln.bias\n", "decoder.blocks.0.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.1.self_attn.k_proj.weight -> decoder.blocks.1.attn.key.weight\n", "decoder.blocks.1.attn.key.weight 2 (768, 768)\n", "model.decoder.layers.1.self_attn.v_proj.weight -> decoder.blocks.1.attn.value.weight\n", "decoder.blocks.1.attn.value.weight 2 (768, 768)\n", "model.decoder.layers.1.self_attn.v_proj.bias -> decoder.blocks.1.attn.value.bias\n", "decoder.blocks.1.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.1.self_attn.q_proj.weight -> decoder.blocks.1.attn.query.weight\n", "decoder.blocks.1.attn.query.weight 2 (768, 768)\n", "model.decoder.layers.1.self_attn.q_proj.bias -> decoder.blocks.1.attn.query.bias\n", "decoder.blocks.1.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.1.self_attn.out_proj.weight -> decoder.blocks.1.attn.out.weight\n", "decoder.blocks.1.attn.out.weight 2 (768, 768)\n", "model.decoder.layers.1.self_attn.out_proj.bias -> decoder.blocks.1.attn.out.bias\n", "decoder.blocks.1.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.1.self_attn_layer_norm.weight -> decoder.blocks.1.attn_ln.weight\n", "decoder.blocks.1.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.1.self_attn_layer_norm.bias -> decoder.blocks.1.attn_ln.bias\n", "decoder.blocks.1.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.1.encoder_attn.k_proj.weight -> decoder.blocks.1.cross_attn.key.weight\n", "decoder.blocks.1.cross_attn.key.weight 2 (768, 768)\n", "model.decoder.layers.1.encoder_attn.v_proj.weight -> decoder.blocks.1.cross_attn.value.weight\n", "decoder.blocks.1.cross_attn.value.weight 2 (768, 768)\n", "model.decoder.layers.1.encoder_attn.v_proj.bias -> decoder.blocks.1.cross_attn.value.bias\n", "decoder.blocks.1.cross_attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.1.encoder_attn.q_proj.weight -> decoder.blocks.1.cross_attn.query.weight\n", "decoder.blocks.1.cross_attn.query.weight 2 (768, 768)\n", "model.decoder.layers.1.encoder_attn.q_proj.bias -> decoder.blocks.1.cross_attn.query.bias\n", "decoder.blocks.1.cross_attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.1.encoder_attn.out_proj.weight -> decoder.blocks.1.cross_attn.out.weight\n", "decoder.blocks.1.cross_attn.out.weight 2 (768, 768)\n", "model.decoder.layers.1.encoder_attn.out_proj.bias -> decoder.blocks.1.cross_attn.out.bias\n", "decoder.blocks.1.cross_attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.1.encoder_attn_layer_norm.weight -> decoder.blocks.1.cross_attn_ln.weight\n", "decoder.blocks.1.cross_attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.1.encoder_attn_layer_norm.bias -> decoder.blocks.1.cross_attn_ln.bias\n", "decoder.blocks.1.cross_attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.1.fc1.weight -> decoder.blocks.1.mlp.0.weight\n", "decoder.blocks.1.mlp.0.weight 2 (3072, 768)\n", "model.decoder.layers.1.fc1.bias -> decoder.blocks.1.mlp.0.bias\n", "decoder.blocks.1.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.decoder.layers.1.fc2.weight -> decoder.blocks.1.mlp.2.weight\n", "decoder.blocks.1.mlp.2.weight 2 (768, 3072)\n", "model.decoder.layers.1.fc2.bias -> decoder.blocks.1.mlp.2.bias\n", "decoder.blocks.1.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.1.final_layer_norm.weight -> decoder.blocks.1.mlp_ln.weight\n", "decoder.blocks.1.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.1.final_layer_norm.bias -> decoder.blocks.1.mlp_ln.bias\n", "decoder.blocks.1.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.2.self_attn.k_proj.weight -> decoder.blocks.2.attn.key.weight\n", "decoder.blocks.2.attn.key.weight 2 (768, 768)\n", "model.decoder.layers.2.self_attn.v_proj.weight -> decoder.blocks.2.attn.value.weight\n", "decoder.blocks.2.attn.value.weight 2 (768, 768)\n", "model.decoder.layers.2.self_attn.v_proj.bias -> decoder.blocks.2.attn.value.bias\n", "decoder.blocks.2.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.2.self_attn.q_proj.weight -> decoder.blocks.2.attn.query.weight\n", "decoder.blocks.2.attn.query.weight 2 (768, 768)\n", "model.decoder.layers.2.self_attn.q_proj.bias -> decoder.blocks.2.attn.query.bias\n", "decoder.blocks.2.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.2.self_attn.out_proj.weight -> decoder.blocks.2.attn.out.weight\n", "decoder.blocks.2.attn.out.weight 2 (768, 768)\n", "model.decoder.layers.2.self_attn.out_proj.bias -> decoder.blocks.2.attn.out.bias\n", "decoder.blocks.2.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.2.self_attn_layer_norm.weight -> decoder.blocks.2.attn_ln.weight\n", "decoder.blocks.2.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.2.self_attn_layer_norm.bias -> decoder.blocks.2.attn_ln.bias\n", "decoder.blocks.2.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.2.encoder_attn.k_proj.weight -> decoder.blocks.2.cross_attn.key.weight\n", "decoder.blocks.2.cross_attn.key.weight 2 (768, 768)\n", "model.decoder.layers.2.encoder_attn.v_proj.weight -> decoder.blocks.2.cross_attn.value.weight\n", "decoder.blocks.2.cross_attn.value.weight 2 (768, 768)\n", "model.decoder.layers.2.encoder_attn.v_proj.bias -> decoder.blocks.2.cross_attn.value.bias\n", "decoder.blocks.2.cross_attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.2.encoder_attn.q_proj.weight -> decoder.blocks.2.cross_attn.query.weight\n", "decoder.blocks.2.cross_attn.query.weight 2 (768, 768)\n", "model.decoder.layers.2.encoder_attn.q_proj.bias -> decoder.blocks.2.cross_attn.query.bias\n", "decoder.blocks.2.cross_attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.2.encoder_attn.out_proj.weight -> decoder.blocks.2.cross_attn.out.weight\n", "decoder.blocks.2.cross_attn.out.weight 2 (768, 768)\n", "model.decoder.layers.2.encoder_attn.out_proj.bias -> decoder.blocks.2.cross_attn.out.bias\n", "decoder.blocks.2.cross_attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.2.encoder_attn_layer_norm.weight -> decoder.blocks.2.cross_attn_ln.weight\n", "decoder.blocks.2.cross_attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.2.encoder_attn_layer_norm.bias -> decoder.blocks.2.cross_attn_ln.bias\n", "decoder.blocks.2.cross_attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.2.fc1.weight -> decoder.blocks.2.mlp.0.weight\n", "decoder.blocks.2.mlp.0.weight 2 (3072, 768)\n", "model.decoder.layers.2.fc1.bias -> decoder.blocks.2.mlp.0.bias\n", "decoder.blocks.2.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.decoder.layers.2.fc2.weight -> decoder.blocks.2.mlp.2.weight\n", "decoder.blocks.2.mlp.2.weight 2 (768, 3072)\n", "model.decoder.layers.2.fc2.bias -> decoder.blocks.2.mlp.2.bias\n", "decoder.blocks.2.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.2.final_layer_norm.weight -> decoder.blocks.2.mlp_ln.weight\n", "decoder.blocks.2.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.2.final_layer_norm.bias -> decoder.blocks.2.mlp_ln.bias\n", "decoder.blocks.2.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.3.self_attn.k_proj.weight -> decoder.blocks.3.attn.key.weight\n", "decoder.blocks.3.attn.key.weight 2 (768, 768)\n", "model.decoder.layers.3.self_attn.v_proj.weight -> decoder.blocks.3.attn.value.weight\n", "decoder.blocks.3.attn.value.weight 2 (768, 768)\n", "model.decoder.layers.3.self_attn.v_proj.bias -> decoder.blocks.3.attn.value.bias\n", "decoder.blocks.3.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.3.self_attn.q_proj.weight -> decoder.blocks.3.attn.query.weight\n", "decoder.blocks.3.attn.query.weight 2 (768, 768)\n", "model.decoder.layers.3.self_attn.q_proj.bias -> decoder.blocks.3.attn.query.bias\n", "decoder.blocks.3.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.3.self_attn.out_proj.weight -> decoder.blocks.3.attn.out.weight\n", "decoder.blocks.3.attn.out.weight 2 (768, 768)\n", "model.decoder.layers.3.self_attn.out_proj.bias -> decoder.blocks.3.attn.out.bias\n", "decoder.blocks.3.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.3.self_attn_layer_norm.weight -> decoder.blocks.3.attn_ln.weight\n", "decoder.blocks.3.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.3.self_attn_layer_norm.bias -> decoder.blocks.3.attn_ln.bias\n", "decoder.blocks.3.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.3.encoder_attn.k_proj.weight -> decoder.blocks.3.cross_attn.key.weight\n", "decoder.blocks.3.cross_attn.key.weight 2 (768, 768)\n", "model.decoder.layers.3.encoder_attn.v_proj.weight -> decoder.blocks.3.cross_attn.value.weight\n", "decoder.blocks.3.cross_attn.value.weight 2 (768, 768)\n", "model.decoder.layers.3.encoder_attn.v_proj.bias -> decoder.blocks.3.cross_attn.value.bias\n", "decoder.blocks.3.cross_attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.3.encoder_attn.q_proj.weight -> decoder.blocks.3.cross_attn.query.weight\n", "decoder.blocks.3.cross_attn.query.weight 2 (768, 768)\n", "model.decoder.layers.3.encoder_attn.q_proj.bias -> decoder.blocks.3.cross_attn.query.bias\n", "decoder.blocks.3.cross_attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.3.encoder_attn.out_proj.weight -> decoder.blocks.3.cross_attn.out.weight\n", "decoder.blocks.3.cross_attn.out.weight 2 (768, 768)\n", "model.decoder.layers.3.encoder_attn.out_proj.bias -> decoder.blocks.3.cross_attn.out.bias\n", "decoder.blocks.3.cross_attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.3.encoder_attn_layer_norm.weight -> decoder.blocks.3.cross_attn_ln.weight\n", "decoder.blocks.3.cross_attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.3.encoder_attn_layer_norm.bias -> decoder.blocks.3.cross_attn_ln.bias\n", "decoder.blocks.3.cross_attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.3.fc1.weight -> decoder.blocks.3.mlp.0.weight\n", "decoder.blocks.3.mlp.0.weight 2 (3072, 768)\n", "model.decoder.layers.3.fc1.bias -> decoder.blocks.3.mlp.0.bias\n", "decoder.blocks.3.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.decoder.layers.3.fc2.weight -> decoder.blocks.3.mlp.2.weight\n", "decoder.blocks.3.mlp.2.weight 2 (768, 3072)\n", "model.decoder.layers.3.fc2.bias -> decoder.blocks.3.mlp.2.bias\n", "decoder.blocks.3.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.3.final_layer_norm.weight -> decoder.blocks.3.mlp_ln.weight\n", "decoder.blocks.3.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.3.final_layer_norm.bias -> decoder.blocks.3.mlp_ln.bias\n", "decoder.blocks.3.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.4.self_attn.k_proj.weight -> decoder.blocks.4.attn.key.weight\n", "decoder.blocks.4.attn.key.weight 2 (768, 768)\n", "model.decoder.layers.4.self_attn.v_proj.weight -> decoder.blocks.4.attn.value.weight\n", "decoder.blocks.4.attn.value.weight 2 (768, 768)\n", "model.decoder.layers.4.self_attn.v_proj.bias -> decoder.blocks.4.attn.value.bias\n", "decoder.blocks.4.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.4.self_attn.q_proj.weight -> decoder.blocks.4.attn.query.weight\n", "decoder.blocks.4.attn.query.weight 2 (768, 768)\n", "model.decoder.layers.4.self_attn.q_proj.bias -> decoder.blocks.4.attn.query.bias\n", "decoder.blocks.4.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.4.self_attn.out_proj.weight -> decoder.blocks.4.attn.out.weight\n", "decoder.blocks.4.attn.out.weight 2 (768, 768)\n", "model.decoder.layers.4.self_attn.out_proj.bias -> decoder.blocks.4.attn.out.bias\n", "decoder.blocks.4.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.4.self_attn_layer_norm.weight -> decoder.blocks.4.attn_ln.weight\n", "decoder.blocks.4.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.4.self_attn_layer_norm.bias -> decoder.blocks.4.attn_ln.bias\n", "decoder.blocks.4.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.4.encoder_attn.k_proj.weight -> decoder.blocks.4.cross_attn.key.weight\n", "decoder.blocks.4.cross_attn.key.weight 2 (768, 768)\n", "model.decoder.layers.4.encoder_attn.v_proj.weight -> decoder.blocks.4.cross_attn.value.weight\n", "decoder.blocks.4.cross_attn.value.weight 2 (768, 768)\n", "model.decoder.layers.4.encoder_attn.v_proj.bias -> decoder.blocks.4.cross_attn.value.bias\n", "decoder.blocks.4.cross_attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.4.encoder_attn.q_proj.weight -> decoder.blocks.4.cross_attn.query.weight\n", "decoder.blocks.4.cross_attn.query.weight 2 (768, 768)\n", "model.decoder.layers.4.encoder_attn.q_proj.bias -> decoder.blocks.4.cross_attn.query.bias\n", "decoder.blocks.4.cross_attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.4.encoder_attn.out_proj.weight -> decoder.blocks.4.cross_attn.out.weight\n", "decoder.blocks.4.cross_attn.out.weight 2 (768, 768)\n", "model.decoder.layers.4.encoder_attn.out_proj.bias -> decoder.blocks.4.cross_attn.out.bias\n", "decoder.blocks.4.cross_attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.4.encoder_attn_layer_norm.weight -> decoder.blocks.4.cross_attn_ln.weight\n", "decoder.blocks.4.cross_attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.4.encoder_attn_layer_norm.bias -> decoder.blocks.4.cross_attn_ln.bias\n", "decoder.blocks.4.cross_attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.4.fc1.weight -> decoder.blocks.4.mlp.0.weight\n", "decoder.blocks.4.mlp.0.weight 2 (3072, 768)\n", "model.decoder.layers.4.fc1.bias -> decoder.blocks.4.mlp.0.bias\n", "decoder.blocks.4.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.decoder.layers.4.fc2.weight -> decoder.blocks.4.mlp.2.weight\n", "decoder.blocks.4.mlp.2.weight 2 (768, 3072)\n", "model.decoder.layers.4.fc2.bias -> decoder.blocks.4.mlp.2.bias\n", "decoder.blocks.4.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.4.final_layer_norm.weight -> decoder.blocks.4.mlp_ln.weight\n", "decoder.blocks.4.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.4.final_layer_norm.bias -> decoder.blocks.4.mlp_ln.bias\n", "decoder.blocks.4.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.5.self_attn.k_proj.weight -> decoder.blocks.5.attn.key.weight\n", "decoder.blocks.5.attn.key.weight 2 (768, 768)\n", "model.decoder.layers.5.self_attn.v_proj.weight -> decoder.blocks.5.attn.value.weight\n", "decoder.blocks.5.attn.value.weight 2 (768, 768)\n", "model.decoder.layers.5.self_attn.v_proj.bias -> decoder.blocks.5.attn.value.bias\n", "decoder.blocks.5.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.5.self_attn.q_proj.weight -> decoder.blocks.5.attn.query.weight\n", "decoder.blocks.5.attn.query.weight 2 (768, 768)\n", "model.decoder.layers.5.self_attn.q_proj.bias -> decoder.blocks.5.attn.query.bias\n", "decoder.blocks.5.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.5.self_attn.out_proj.weight -> decoder.blocks.5.attn.out.weight\n", "decoder.blocks.5.attn.out.weight 2 (768, 768)\n", "model.decoder.layers.5.self_attn.out_proj.bias -> decoder.blocks.5.attn.out.bias\n", "decoder.blocks.5.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.5.self_attn_layer_norm.weight -> decoder.blocks.5.attn_ln.weight\n", "decoder.blocks.5.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.5.self_attn_layer_norm.bias -> decoder.blocks.5.attn_ln.bias\n", "decoder.blocks.5.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.5.encoder_attn.k_proj.weight -> decoder.blocks.5.cross_attn.key.weight\n", "decoder.blocks.5.cross_attn.key.weight 2 (768, 768)\n", "model.decoder.layers.5.encoder_attn.v_proj.weight -> decoder.blocks.5.cross_attn.value.weight\n", "decoder.blocks.5.cross_attn.value.weight 2 (768, 768)\n", "model.decoder.layers.5.encoder_attn.v_proj.bias -> decoder.blocks.5.cross_attn.value.bias\n", "decoder.blocks.5.cross_attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.5.encoder_attn.q_proj.weight -> decoder.blocks.5.cross_attn.query.weight\n", "decoder.blocks.5.cross_attn.query.weight 2 (768, 768)\n", "model.decoder.layers.5.encoder_attn.q_proj.bias -> decoder.blocks.5.cross_attn.query.bias\n", "decoder.blocks.5.cross_attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.5.encoder_attn.out_proj.weight -> decoder.blocks.5.cross_attn.out.weight\n", "decoder.blocks.5.cross_attn.out.weight 2 (768, 768)\n", "model.decoder.layers.5.encoder_attn.out_proj.bias -> decoder.blocks.5.cross_attn.out.bias\n", "decoder.blocks.5.cross_attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.5.encoder_attn_layer_norm.weight -> decoder.blocks.5.cross_attn_ln.weight\n", "decoder.blocks.5.cross_attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.5.encoder_attn_layer_norm.bias -> decoder.blocks.5.cross_attn_ln.bias\n", "decoder.blocks.5.cross_attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.5.fc1.weight -> decoder.blocks.5.mlp.0.weight\n", "decoder.blocks.5.mlp.0.weight 2 (3072, 768)\n", "model.decoder.layers.5.fc1.bias -> decoder.blocks.5.mlp.0.bias\n", "decoder.blocks.5.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.decoder.layers.5.fc2.weight -> decoder.blocks.5.mlp.2.weight\n", "decoder.blocks.5.mlp.2.weight 2 (768, 3072)\n", "model.decoder.layers.5.fc2.bias -> decoder.blocks.5.mlp.2.bias\n", "decoder.blocks.5.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.5.final_layer_norm.weight -> decoder.blocks.5.mlp_ln.weight\n", "decoder.blocks.5.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.5.final_layer_norm.bias -> decoder.blocks.5.mlp_ln.bias\n", "decoder.blocks.5.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.6.self_attn.k_proj.weight -> decoder.blocks.6.attn.key.weight\n", "decoder.blocks.6.attn.key.weight 2 (768, 768)\n", "model.decoder.layers.6.self_attn.v_proj.weight -> decoder.blocks.6.attn.value.weight\n", "decoder.blocks.6.attn.value.weight 2 (768, 768)\n", "model.decoder.layers.6.self_attn.v_proj.bias -> decoder.blocks.6.attn.value.bias\n", "decoder.blocks.6.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.6.self_attn.q_proj.weight -> decoder.blocks.6.attn.query.weight\n", "decoder.blocks.6.attn.query.weight 2 (768, 768)\n", "model.decoder.layers.6.self_attn.q_proj.bias -> decoder.blocks.6.attn.query.bias\n", "decoder.blocks.6.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.6.self_attn.out_proj.weight -> decoder.blocks.6.attn.out.weight\n", "decoder.blocks.6.attn.out.weight 2 (768, 768)\n", "model.decoder.layers.6.self_attn.out_proj.bias -> decoder.blocks.6.attn.out.bias\n", "decoder.blocks.6.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.6.self_attn_layer_norm.weight -> decoder.blocks.6.attn_ln.weight\n", "decoder.blocks.6.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.6.self_attn_layer_norm.bias -> decoder.blocks.6.attn_ln.bias\n", "decoder.blocks.6.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.6.encoder_attn.k_proj.weight -> decoder.blocks.6.cross_attn.key.weight\n", "decoder.blocks.6.cross_attn.key.weight 2 (768, 768)\n", "model.decoder.layers.6.encoder_attn.v_proj.weight -> decoder.blocks.6.cross_attn.value.weight\n", "decoder.blocks.6.cross_attn.value.weight 2 (768, 768)\n", "model.decoder.layers.6.encoder_attn.v_proj.bias -> decoder.blocks.6.cross_attn.value.bias\n", "decoder.blocks.6.cross_attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.6.encoder_attn.q_proj.weight -> decoder.blocks.6.cross_attn.query.weight\n", "decoder.blocks.6.cross_attn.query.weight 2 (768, 768)\n", "model.decoder.layers.6.encoder_attn.q_proj.bias -> decoder.blocks.6.cross_attn.query.bias\n", "decoder.blocks.6.cross_attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.6.encoder_attn.out_proj.weight -> decoder.blocks.6.cross_attn.out.weight\n", "decoder.blocks.6.cross_attn.out.weight 2 (768, 768)\n", "model.decoder.layers.6.encoder_attn.out_proj.bias -> decoder.blocks.6.cross_attn.out.bias\n", "decoder.blocks.6.cross_attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.6.encoder_attn_layer_norm.weight -> decoder.blocks.6.cross_attn_ln.weight\n", "decoder.blocks.6.cross_attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.6.encoder_attn_layer_norm.bias -> decoder.blocks.6.cross_attn_ln.bias\n", "decoder.blocks.6.cross_attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.6.fc1.weight -> decoder.blocks.6.mlp.0.weight\n", "decoder.blocks.6.mlp.0.weight 2 (3072, 768)\n", "model.decoder.layers.6.fc1.bias -> decoder.blocks.6.mlp.0.bias\n", "decoder.blocks.6.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.decoder.layers.6.fc2.weight -> decoder.blocks.6.mlp.2.weight\n", "decoder.blocks.6.mlp.2.weight 2 (768, 3072)\n", "model.decoder.layers.6.fc2.bias -> decoder.blocks.6.mlp.2.bias\n", "decoder.blocks.6.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.6.final_layer_norm.weight -> decoder.blocks.6.mlp_ln.weight\n", "decoder.blocks.6.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.6.final_layer_norm.bias -> decoder.blocks.6.mlp_ln.bias\n", "decoder.blocks.6.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.7.self_attn.k_proj.weight -> decoder.blocks.7.attn.key.weight\n", "decoder.blocks.7.attn.key.weight 2 (768, 768)\n", "model.decoder.layers.7.self_attn.v_proj.weight -> decoder.blocks.7.attn.value.weight\n", "decoder.blocks.7.attn.value.weight 2 (768, 768)\n", "model.decoder.layers.7.self_attn.v_proj.bias -> decoder.blocks.7.attn.value.bias\n", "decoder.blocks.7.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.7.self_attn.q_proj.weight -> decoder.blocks.7.attn.query.weight\n", "decoder.blocks.7.attn.query.weight 2 (768, 768)\n", "model.decoder.layers.7.self_attn.q_proj.bias -> decoder.blocks.7.attn.query.bias\n", "decoder.blocks.7.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.7.self_attn.out_proj.weight -> decoder.blocks.7.attn.out.weight\n", "decoder.blocks.7.attn.out.weight 2 (768, 768)\n", "model.decoder.layers.7.self_attn.out_proj.bias -> decoder.blocks.7.attn.out.bias\n", "decoder.blocks.7.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.7.self_attn_layer_norm.weight -> decoder.blocks.7.attn_ln.weight\n", "decoder.blocks.7.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.7.self_attn_layer_norm.bias -> decoder.blocks.7.attn_ln.bias\n", "decoder.blocks.7.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.7.encoder_attn.k_proj.weight -> decoder.blocks.7.cross_attn.key.weight\n", "decoder.blocks.7.cross_attn.key.weight 2 (768, 768)\n", "model.decoder.layers.7.encoder_attn.v_proj.weight -> decoder.blocks.7.cross_attn.value.weight\n", "decoder.blocks.7.cross_attn.value.weight 2 (768, 768)\n", "model.decoder.layers.7.encoder_attn.v_proj.bias -> decoder.blocks.7.cross_attn.value.bias\n", "decoder.blocks.7.cross_attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.7.encoder_attn.q_proj.weight -> decoder.blocks.7.cross_attn.query.weight\n", "decoder.blocks.7.cross_attn.query.weight 2 (768, 768)\n", "model.decoder.layers.7.encoder_attn.q_proj.bias -> decoder.blocks.7.cross_attn.query.bias\n", "decoder.blocks.7.cross_attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.7.encoder_attn.out_proj.weight -> decoder.blocks.7.cross_attn.out.weight\n", "decoder.blocks.7.cross_attn.out.weight 2 (768, 768)\n", "model.decoder.layers.7.encoder_attn.out_proj.bias -> decoder.blocks.7.cross_attn.out.bias\n", "decoder.blocks.7.cross_attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.7.encoder_attn_layer_norm.weight -> decoder.blocks.7.cross_attn_ln.weight\n", "decoder.blocks.7.cross_attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.7.encoder_attn_layer_norm.bias -> decoder.blocks.7.cross_attn_ln.bias\n", "decoder.blocks.7.cross_attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.7.fc1.weight -> decoder.blocks.7.mlp.0.weight\n", "decoder.blocks.7.mlp.0.weight 2 (3072, 768)\n", "model.decoder.layers.7.fc1.bias -> decoder.blocks.7.mlp.0.bias\n", "decoder.blocks.7.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.decoder.layers.7.fc2.weight -> decoder.blocks.7.mlp.2.weight\n", "decoder.blocks.7.mlp.2.weight 2 (768, 3072)\n", "model.decoder.layers.7.fc2.bias -> decoder.blocks.7.mlp.2.bias\n", "decoder.blocks.7.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.7.final_layer_norm.weight -> decoder.blocks.7.mlp_ln.weight\n", "decoder.blocks.7.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.7.final_layer_norm.bias -> decoder.blocks.7.mlp_ln.bias\n", "decoder.blocks.7.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.8.self_attn.k_proj.weight -> decoder.blocks.8.attn.key.weight\n", "decoder.blocks.8.attn.key.weight 2 (768, 768)\n", "model.decoder.layers.8.self_attn.v_proj.weight -> decoder.blocks.8.attn.value.weight\n", "decoder.blocks.8.attn.value.weight 2 (768, 768)\n", "model.decoder.layers.8.self_attn.v_proj.bias -> decoder.blocks.8.attn.value.bias\n", "decoder.blocks.8.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.8.self_attn.q_proj.weight -> decoder.blocks.8.attn.query.weight\n", "decoder.blocks.8.attn.query.weight 2 (768, 768)\n", "model.decoder.layers.8.self_attn.q_proj.bias -> decoder.blocks.8.attn.query.bias\n", "decoder.blocks.8.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.8.self_attn.out_proj.weight -> decoder.blocks.8.attn.out.weight\n", "decoder.blocks.8.attn.out.weight 2 (768, 768)\n", "model.decoder.layers.8.self_attn.out_proj.bias -> decoder.blocks.8.attn.out.bias\n", "decoder.blocks.8.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.8.self_attn_layer_norm.weight -> decoder.blocks.8.attn_ln.weight\n", "decoder.blocks.8.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.8.self_attn_layer_norm.bias -> decoder.blocks.8.attn_ln.bias\n", "decoder.blocks.8.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.8.encoder_attn.k_proj.weight -> decoder.blocks.8.cross_attn.key.weight\n", "decoder.blocks.8.cross_attn.key.weight 2 (768, 768)\n", "model.decoder.layers.8.encoder_attn.v_proj.weight -> decoder.blocks.8.cross_attn.value.weight\n", "decoder.blocks.8.cross_attn.value.weight 2 (768, 768)\n", "model.decoder.layers.8.encoder_attn.v_proj.bias -> decoder.blocks.8.cross_attn.value.bias\n", "decoder.blocks.8.cross_attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.8.encoder_attn.q_proj.weight -> decoder.blocks.8.cross_attn.query.weight\n", "decoder.blocks.8.cross_attn.query.weight 2 (768, 768)\n", "model.decoder.layers.8.encoder_attn.q_proj.bias -> decoder.blocks.8.cross_attn.query.bias\n", "decoder.blocks.8.cross_attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.8.encoder_attn.out_proj.weight -> decoder.blocks.8.cross_attn.out.weight\n", "decoder.blocks.8.cross_attn.out.weight 2 (768, 768)\n", "model.decoder.layers.8.encoder_attn.out_proj.bias -> decoder.blocks.8.cross_attn.out.bias\n", "decoder.blocks.8.cross_attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.8.encoder_attn_layer_norm.weight -> decoder.blocks.8.cross_attn_ln.weight\n", "decoder.blocks.8.cross_attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.8.encoder_attn_layer_norm.bias -> decoder.blocks.8.cross_attn_ln.bias\n", "decoder.blocks.8.cross_attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.8.fc1.weight -> decoder.blocks.8.mlp.0.weight\n", "decoder.blocks.8.mlp.0.weight 2 (3072, 768)\n", "model.decoder.layers.8.fc1.bias -> decoder.blocks.8.mlp.0.bias\n", "decoder.blocks.8.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.decoder.layers.8.fc2.weight -> decoder.blocks.8.mlp.2.weight\n", "decoder.blocks.8.mlp.2.weight 2 (768, 3072)\n", "model.decoder.layers.8.fc2.bias -> decoder.blocks.8.mlp.2.bias\n", "decoder.blocks.8.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.8.final_layer_norm.weight -> decoder.blocks.8.mlp_ln.weight\n", "decoder.blocks.8.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.8.final_layer_norm.bias -> decoder.blocks.8.mlp_ln.bias\n", "decoder.blocks.8.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.9.self_attn.k_proj.weight -> decoder.blocks.9.attn.key.weight\n", "decoder.blocks.9.attn.key.weight 2 (768, 768)\n", "model.decoder.layers.9.self_attn.v_proj.weight -> decoder.blocks.9.attn.value.weight\n", "decoder.blocks.9.attn.value.weight 2 (768, 768)\n", "model.decoder.layers.9.self_attn.v_proj.bias -> decoder.blocks.9.attn.value.bias\n", "decoder.blocks.9.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.9.self_attn.q_proj.weight -> decoder.blocks.9.attn.query.weight\n", "decoder.blocks.9.attn.query.weight 2 (768, 768)\n", "model.decoder.layers.9.self_attn.q_proj.bias -> decoder.blocks.9.attn.query.bias\n", "decoder.blocks.9.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.9.self_attn.out_proj.weight -> decoder.blocks.9.attn.out.weight\n", "decoder.blocks.9.attn.out.weight 2 (768, 768)\n", "model.decoder.layers.9.self_attn.out_proj.bias -> decoder.blocks.9.attn.out.bias\n", "decoder.blocks.9.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.9.self_attn_layer_norm.weight -> decoder.blocks.9.attn_ln.weight\n", "decoder.blocks.9.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.9.self_attn_layer_norm.bias -> decoder.blocks.9.attn_ln.bias\n", "decoder.blocks.9.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.9.encoder_attn.k_proj.weight -> decoder.blocks.9.cross_attn.key.weight\n", "decoder.blocks.9.cross_attn.key.weight 2 (768, 768)\n", "model.decoder.layers.9.encoder_attn.v_proj.weight -> decoder.blocks.9.cross_attn.value.weight\n", "decoder.blocks.9.cross_attn.value.weight 2 (768, 768)\n", "model.decoder.layers.9.encoder_attn.v_proj.bias -> decoder.blocks.9.cross_attn.value.bias\n", "decoder.blocks.9.cross_attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.9.encoder_attn.q_proj.weight -> decoder.blocks.9.cross_attn.query.weight\n", "decoder.blocks.9.cross_attn.query.weight 2 (768, 768)\n", "model.decoder.layers.9.encoder_attn.q_proj.bias -> decoder.blocks.9.cross_attn.query.bias\n", "decoder.blocks.9.cross_attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.9.encoder_attn.out_proj.weight -> decoder.blocks.9.cross_attn.out.weight\n", "decoder.blocks.9.cross_attn.out.weight 2 (768, 768)\n", "model.decoder.layers.9.encoder_attn.out_proj.bias -> decoder.blocks.9.cross_attn.out.bias\n", "decoder.blocks.9.cross_attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.9.encoder_attn_layer_norm.weight -> decoder.blocks.9.cross_attn_ln.weight\n", "decoder.blocks.9.cross_attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.9.encoder_attn_layer_norm.bias -> decoder.blocks.9.cross_attn_ln.bias\n", "decoder.blocks.9.cross_attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.9.fc1.weight -> decoder.blocks.9.mlp.0.weight\n", "decoder.blocks.9.mlp.0.weight 2 (3072, 768)\n", "model.decoder.layers.9.fc1.bias -> decoder.blocks.9.mlp.0.bias\n", "decoder.blocks.9.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.decoder.layers.9.fc2.weight -> decoder.blocks.9.mlp.2.weight\n", "decoder.blocks.9.mlp.2.weight 2 (768, 3072)\n", "model.decoder.layers.9.fc2.bias -> decoder.blocks.9.mlp.2.bias\n", "decoder.blocks.9.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.9.final_layer_norm.weight -> decoder.blocks.9.mlp_ln.weight\n", "decoder.blocks.9.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.9.final_layer_norm.bias -> decoder.blocks.9.mlp_ln.bias\n", "decoder.blocks.9.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.10.self_attn.k_proj.weight -> decoder.blocks.10.attn.key.weight\n", "decoder.blocks.10.attn.key.weight 2 (768, 768)\n", "model.decoder.layers.10.self_attn.v_proj.weight -> decoder.blocks.10.attn.value.weight\n", "decoder.blocks.10.attn.value.weight 2 (768, 768)\n", "model.decoder.layers.10.self_attn.v_proj.bias -> decoder.blocks.10.attn.value.bias\n", "decoder.blocks.10.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.10.self_attn.q_proj.weight -> decoder.blocks.10.attn.query.weight\n", "decoder.blocks.10.attn.query.weight 2 (768, 768)\n", "model.decoder.layers.10.self_attn.q_proj.bias -> decoder.blocks.10.attn.query.bias\n", "decoder.blocks.10.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.10.self_attn.out_proj.weight -> decoder.blocks.10.attn.out.weight\n", "decoder.blocks.10.attn.out.weight 2 (768, 768)\n", "model.decoder.layers.10.self_attn.out_proj.bias -> decoder.blocks.10.attn.out.bias\n", "decoder.blocks.10.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.10.self_attn_layer_norm.weight -> decoder.blocks.10.attn_ln.weight\n", "decoder.blocks.10.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.10.self_attn_layer_norm.bias -> decoder.blocks.10.attn_ln.bias\n", "decoder.blocks.10.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.10.encoder_attn.k_proj.weight -> decoder.blocks.10.cross_attn.key.weight\n", "decoder.blocks.10.cross_attn.key.weight 2 (768, 768)\n", "model.decoder.layers.10.encoder_attn.v_proj.weight -> decoder.blocks.10.cross_attn.value.weight\n", "decoder.blocks.10.cross_attn.value.weight 2 (768, 768)\n", "model.decoder.layers.10.encoder_attn.v_proj.bias -> decoder.blocks.10.cross_attn.value.bias\n", "decoder.blocks.10.cross_attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.10.encoder_attn.q_proj.weight -> decoder.blocks.10.cross_attn.query.weight\n", "decoder.blocks.10.cross_attn.query.weight 2 (768, 768)\n", "model.decoder.layers.10.encoder_attn.q_proj.bias -> decoder.blocks.10.cross_attn.query.bias\n", "decoder.blocks.10.cross_attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.10.encoder_attn.out_proj.weight -> decoder.blocks.10.cross_attn.out.weight\n", "decoder.blocks.10.cross_attn.out.weight 2 (768, 768)\n", "model.decoder.layers.10.encoder_attn.out_proj.bias -> decoder.blocks.10.cross_attn.out.bias\n", "decoder.blocks.10.cross_attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.10.encoder_attn_layer_norm.weight -> decoder.blocks.10.cross_attn_ln.weight\n", "decoder.blocks.10.cross_attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.10.encoder_attn_layer_norm.bias -> decoder.blocks.10.cross_attn_ln.bias\n", "decoder.blocks.10.cross_attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.10.fc1.weight -> decoder.blocks.10.mlp.0.weight\n", "decoder.blocks.10.mlp.0.weight 2 (3072, 768)\n", "model.decoder.layers.10.fc1.bias -> decoder.blocks.10.mlp.0.bias\n", "decoder.blocks.10.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.decoder.layers.10.fc2.weight -> decoder.blocks.10.mlp.2.weight\n", "decoder.blocks.10.mlp.2.weight 2 (768, 3072)\n", "model.decoder.layers.10.fc2.bias -> decoder.blocks.10.mlp.2.bias\n", "decoder.blocks.10.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.10.final_layer_norm.weight -> decoder.blocks.10.mlp_ln.weight\n", "decoder.blocks.10.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.10.final_layer_norm.bias -> decoder.blocks.10.mlp_ln.bias\n", "decoder.blocks.10.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.11.self_attn.k_proj.weight -> decoder.blocks.11.attn.key.weight\n", "decoder.blocks.11.attn.key.weight 2 (768, 768)\n", "model.decoder.layers.11.self_attn.v_proj.weight -> decoder.blocks.11.attn.value.weight\n", "decoder.blocks.11.attn.value.weight 2 (768, 768)\n", "model.decoder.layers.11.self_attn.v_proj.bias -> decoder.blocks.11.attn.value.bias\n", "decoder.blocks.11.attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.11.self_attn.q_proj.weight -> decoder.blocks.11.attn.query.weight\n", "decoder.blocks.11.attn.query.weight 2 (768, 768)\n", "model.decoder.layers.11.self_attn.q_proj.bias -> decoder.blocks.11.attn.query.bias\n", "decoder.blocks.11.attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.11.self_attn.out_proj.weight -> decoder.blocks.11.attn.out.weight\n", "decoder.blocks.11.attn.out.weight 2 (768, 768)\n", "model.decoder.layers.11.self_attn.out_proj.bias -> decoder.blocks.11.attn.out.bias\n", "decoder.blocks.11.attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.11.self_attn_layer_norm.weight -> decoder.blocks.11.attn_ln.weight\n", "decoder.blocks.11.attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.11.self_attn_layer_norm.bias -> decoder.blocks.11.attn_ln.bias\n", "decoder.blocks.11.attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.11.encoder_attn.k_proj.weight -> decoder.blocks.11.cross_attn.key.weight\n", "decoder.blocks.11.cross_attn.key.weight 2 (768, 768)\n", "model.decoder.layers.11.encoder_attn.v_proj.weight -> decoder.blocks.11.cross_attn.value.weight\n", "decoder.blocks.11.cross_attn.value.weight 2 (768, 768)\n", "model.decoder.layers.11.encoder_attn.v_proj.bias -> decoder.blocks.11.cross_attn.value.bias\n", "decoder.blocks.11.cross_attn.value.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.11.encoder_attn.q_proj.weight -> decoder.blocks.11.cross_attn.query.weight\n", "decoder.blocks.11.cross_attn.query.weight 2 (768, 768)\n", "model.decoder.layers.11.encoder_attn.q_proj.bias -> decoder.blocks.11.cross_attn.query.bias\n", "decoder.blocks.11.cross_attn.query.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.11.encoder_attn.out_proj.weight -> decoder.blocks.11.cross_attn.out.weight\n", "decoder.blocks.11.cross_attn.out.weight 2 (768, 768)\n", "model.decoder.layers.11.encoder_attn.out_proj.bias -> decoder.blocks.11.cross_attn.out.bias\n", "decoder.blocks.11.cross_attn.out.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.11.encoder_attn_layer_norm.weight -> decoder.blocks.11.cross_attn_ln.weight\n", "decoder.blocks.11.cross_attn_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.11.encoder_attn_layer_norm.bias -> decoder.blocks.11.cross_attn_ln.bias\n", "decoder.blocks.11.cross_attn_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.11.fc1.weight -> decoder.blocks.11.mlp.0.weight\n", "decoder.blocks.11.mlp.0.weight 2 (3072, 768)\n", "model.decoder.layers.11.fc1.bias -> decoder.blocks.11.mlp.0.bias\n", "decoder.blocks.11.mlp.0.bias 1 (3072,)\n", " Converting to float32\n", "model.decoder.layers.11.fc2.weight -> decoder.blocks.11.mlp.2.weight\n", "decoder.blocks.11.mlp.2.weight 2 (768, 3072)\n", "model.decoder.layers.11.fc2.bias -> decoder.blocks.11.mlp.2.bias\n", "decoder.blocks.11.mlp.2.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.11.final_layer_norm.weight -> decoder.blocks.11.mlp_ln.weight\n", "decoder.blocks.11.mlp_ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layers.11.final_layer_norm.bias -> decoder.blocks.11.mlp_ln.bias\n", "decoder.blocks.11.mlp_ln.bias 1 (768,)\n", " Converting to float32\n", "model.decoder.layer_norm.weight -> decoder.ln.weight\n", "decoder.ln.weight 1 (768,)\n", " Converting to float32\n", "model.decoder.layer_norm.bias -> decoder.ln.bias\n", "decoder.ln.bias 1 (768,)\n", " Converting to float32\n", "Skipping proj_out.weight\n", "Done. Output file: ./ggml-model.bin\n", "\n" ] } ] } ] }