{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"Find this notebook on [kaggle](https://www.kaggle.com/code/amankhandelia/convert-mms-alignment-checkpoint-to-jax)","metadata":{}},{"cell_type":"code","source":"!pip install --pre torchaudio==2.1.0.dev20230627+cu118 --index-url https://download.pytorch.org/whl/nightly/cu118\n!pip install transformers==4.31.0\n!pip install loguru","metadata":{"execution":{"iopub.status.busy":"2023-08-14T06:25:59.525326Z","iopub.execute_input":"2023-08-14T06:25:59.525991Z","iopub.status.idle":"2023-08-14T06:30:10.187806Z","shell.execute_reply.started":"2023-08-14T06:25:59.525945Z","shell.execute_reply":"2023-08-14T06:30:10.185818Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"Looking in indexes: https://download.pytorch.org/whl/nightly/cu118\nCollecting torchaudio==2.1.0.dev20230627+cu118\n Downloading https://download.pytorch.org/whl/nightly/cu118/torchaudio-2.1.0.dev20230627%2Bcu118-cp310-cp310-linux_x86_64.whl (4.1 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.1/4.1 MB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hCollecting torch==2.1.0.dev20230627 (from torchaudio==2.1.0.dev20230627+cu118)\n Downloading https://download.pytorch.org/whl/nightly/cu118/torch-2.1.0.dev20230627%2Bcu118-cp310-cp310-linux_x86_64.whl (2316.2 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 GB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m0:00:00\u001b[0m \u001b[36m0:00:01\u001b[0m00:03\u001b[0mm\n\u001b[?25hRequirement already satisfied: filelock in /opt/conda/lib/python3.10/site-packages (from torch==2.1.0.dev20230627->torchaudio==2.1.0.dev20230627+cu118) (3.12.2)\nRequirement already satisfied: typing-extensions in /opt/conda/lib/python3.10/site-packages (from torch==2.1.0.dev20230627->torchaudio==2.1.0.dev20230627+cu118) (4.6.3)\nRequirement already satisfied: sympy in /opt/conda/lib/python3.10/site-packages (from torch==2.1.0.dev20230627->torchaudio==2.1.0.dev20230627+cu118) (1.12)\nRequirement already satisfied: networkx in /opt/conda/lib/python3.10/site-packages (from torch==2.1.0.dev20230627->torchaudio==2.1.0.dev20230627+cu118) (3.1)\nRequirement already satisfied: jinja2 in /opt/conda/lib/python3.10/site-packages (from torch==2.1.0.dev20230627->torchaudio==2.1.0.dev20230627+cu118) (3.1.2)\nRequirement already satisfied: fsspec in /opt/conda/lib/python3.10/site-packages (from torch==2.1.0.dev20230627->torchaudio==2.1.0.dev20230627+cu118) (2023.6.0)\nCollecting pytorch-triton==2.1.0+440fd1bf20 (from torch==2.1.0.dev20230627->torchaudio==2.1.0.dev20230627+cu118)\n Downloading https://download.pytorch.org/whl/nightly/pytorch_triton-2.1.0%2B440fd1bf20-cp310-cp310-linux_x86_64.whl (93.1 MB)\n\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m93.1/93.1 MB\u001b[0m \u001b[31m8.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\n\u001b[?25hRequirement already satisfied: MarkupSafe>=2.0 in /opt/conda/lib/python3.10/site-packages (from jinja2->torch==2.1.0.dev20230627->torchaudio==2.1.0.dev20230627+cu118) (2.1.3)\nRequirement already satisfied: mpmath>=0.19 in /opt/conda/lib/python3.10/site-packages (from sympy->torch==2.1.0.dev20230627->torchaudio==2.1.0.dev20230627+cu118) (1.3.0)\nInstalling collected packages: pytorch-triton, torch, torchaudio\n Attempting uninstall: torch\n Found existing installation: torch 2.0.0+cpu\n Uninstalling torch-2.0.0+cpu:\n Successfully uninstalled torch-2.0.0+cpu\n Attempting uninstall: torchaudio\n Found existing installation: torchaudio 2.0.1+cpu\n Uninstalling torchaudio-2.0.1+cpu:\n Successfully uninstalled torchaudio-2.0.1+cpu\n\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\nfastai 2.7.12 requires torch<2.1,>=1.7, but you have torch 2.1.0.dev20230627+cu118 which is incompatible.\ntorchdata 0.6.0 requires torch==2.0.0, but you have torch 2.1.0.dev20230627+cu118 which is incompatible.\ntorchtext 0.15.1+cpu requires torch==2.0.0, but you have torch 2.1.0.dev20230627+cu118 which is incompatible.\ntorchvision 0.15.1+cpu requires torch==2.0.0, but you have torch 2.1.0.dev20230627+cu118 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0mSuccessfully installed pytorch-triton-2.1.0+440fd1bf20 torch-2.1.0.dev20230627+cu118 torchaudio-2.1.0.dev20230627+cu118\n","output_type":"stream"}]},{"cell_type":"code","source":"%%writefile /kaggle/working/wav2vec2_alignment_config.json\n\n{\n \"activation_dropout\": 0.1,\n \"adapter_attn_dim\": null,\n \"adapter_kernel_size\": 3,\n \"adapter_stride\": 2,\n \"add_adapter\": false,\n \"apply_spec_augment\": false,\n \"attention_dropout\": 0.0,\n \"bos_token_id\": 1,\n \"classifier_proj_size\": 256,\n \"codevector_dim\": 256,\n \"contrastive_logits_temperature\": 0.1,\n \"conv_bias\": true,\n \"conv_dim\": [\n 512,\n 512,\n 512,\n 512,\n 512,\n 512,\n 512\n ],\n \"conv_kernel\": [\n 10,\n 3,\n 3,\n 3,\n 3,\n 2,\n 2\n ],\n \"conv_stride\": [\n 5,\n 2,\n 2,\n 2,\n 2,\n 2,\n 2\n ],\n \"ctc_loss_reduction\": \"sum\",\n \"ctc_zero_infinity\": false,\n \"diversity_loss_weight\": 0.1,\n \"do_stable_layer_norm\": true,\n \"eos_token_id\": 2,\n \"feat_extract_activation\": \"gelu\",\n \"feat_extract_norm\": \"layer\",\n \"feat_proj_dropout\": 0.0,\n \"feat_quantizer_dropout\": 0.0,\n \"final_dropout\": 0.1,\n \"hidden_act\": \"gelu\",\n \"hidden_dropout\": 0.0,\n \"hidden_size\": 1024,\n \"initializer_range\": 0.02,\n \"intermediate_size\": 4096,\n \"layer_norm_eps\": 1e-05,\n \"layerdrop\": 0.1,\n \"num_labels\":31,\n \"mask_feature_length\": 10,\n \"mask_feature_min_masks\": 0,\n \"mask_feature_prob\": 0.0,\n \"mask_time_length\": 10,\n \"mask_time_min_masks\": 2,\n \"mask_time_prob\": 0.0,\n \"model_type\": \"wav2vec2\",\n \"num_adapter_layers\": 3,\n \"num_attention_heads\": 16,\n \"num_codevector_groups\": 2,\n \"num_codevectors_per_group\": 320,\n \"num_conv_pos_embedding_groups\": 16,\n \"num_conv_pos_embeddings\": 128,\n \"num_feat_extract_layers\": 7,\n \"num_hidden_layers\": 24,\n \"num_negatives\": 100,\n \"output_hidden_size\": 1024,\n \"pad_token_id\": 0,\n \"proj_codevector_dim\": 256,\n \"tdnn_dilation\": [\n 1,\n 2,\n 3,\n 1,\n 1\n ],\n \"tdnn_dim\": [\n 512,\n 512,\n 512,\n 512,\n 1500\n ],\n \"tdnn_kernel\": [\n 5,\n 3,\n 3,\n 1,\n 1\n ],\n \"transformers_version\": \"4.31.0\",\n \"use_weighted_layer_sum\": false,\n \"vocab_size\": 32,\n \"xvector_output_dim\": 512\n}\n","metadata":{"execution":{"iopub.status.busy":"2023-08-14T06:33:48.085818Z","iopub.execute_input":"2023-08-14T06:33:48.086321Z","iopub.status.idle":"2023-08-14T06:33:48.097226Z","shell.execute_reply.started":"2023-08-14T06:33:48.086282Z","shell.execute_reply":"2023-08-14T06:33:48.095455Z"},"trusted":true},"execution_count":7,"outputs":[{"name":"stdout","text":"Writing /kaggle/working/wav2vec2_alignment_config.json\n","output_type":"stream"}]},{"cell_type":"code","source":"\"\"\"Convert Wav2Vec2 checkpoint.\"\"\"\n\n# ref: https://github.com/huggingface/transformers/blob/main/src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py\n\nimport os\nimport re\nimport torch\nfrom transformers import Wav2Vec2ForAudioFrameClassification\nfrom transformers import Wav2Vec2Config\n\nfrom torchaudio.models import wav2vec2_model\nfrom loguru import logger\n\n\nMAPPING = {\n \"feature_projection.projection\": \"feature_projection.projection\",\n \"encoder.transformer.pos_conv_embed.conv.weight_g\": \"encoder.pos_conv_embed.conv.parametrizations.weight\",\n \"encoder.transformer.pos_conv_embed.conv.weight_v\": \"encoder.pos_conv_embed.conv.parametrizations.weight\",\n \"encoder.transformer.pos_conv_embed.conv.bias\": \"encoder.pos_conv_embed.conv\",\n \"attention.k_proj\": \"encoder.layers.*.attention.k_proj\",\n \"attention.v_proj\": \"encoder.layers.*.attention.v_proj\",\n \"attention.q_proj\": \"encoder.layers.*.attention.q_proj\",\n \"attention.out_proj\": \"encoder.layers.*.attention.out_proj\",\n \"transformer.layers.*.layer_norm\": \"encoder.layers.*.layer_norm\",\n \"feed_forward.intermediate_dense\": \"encoder.layers.*.feed_forward.intermediate_dense\",\n \"feed_forward.output_dense\": \"encoder.layers.*.feed_forward.output_dense\",\n \"final_layer_norm\": \"encoder.layers.*.final_layer_norm\",\n \"encoder.transformer.layer_norm\": \"encoder.layer_norm\",\n \"aux\": \"classifier\",\n \"adapter_layer\": \"encoder.layers.*.adapter_layer\",\n \"feature_projection.layer_norm\": \"feature_projection.layer_norm\",\n \"quantizer.weight_proj\": \"quantizer.weight_proj\",\n \"quantizer.vars\": \"quantizer.codevectors\",\n \"project_q\": \"project_q\",\n \"final_proj\": \"project_hid\",\n \"w2v_encoder.proj\": \"lm_head\",\n \"mask_emb\": \"masked_spec_embed\",\n \"pooling_layer.linear\": \"projector\",\n \"pooling_layer.projection\": \"classifier\",\n}\nTOP_LEVEL_KEYS = [\n \"lm_head\",\n \"quantizer.weight_proj\",\n \"quantizer.codevectors\",\n \"project_q\",\n \"project_hid\",\n \"projector\",\n \"classifier\",\n]\n\n\ndef check_or_download_model_weights(model_path_name=\"/tmp/ctc_alignment_mling_uroman_model.pt\"):\n # this model has 315,469,471 parameters\n logger.info(\"Downloading model and dictionary...\")\n if os.path.exists(model_path_name):\n logger.info(\"Model path already exists. Skipping downloading....\")\n else:\n torch.hub.download_url_to_file(\n \"https://dl.fbaipublicfiles.com/mms/torchaudio/ctc_alignment_mling_uroman/model.pt\",\n model_path_name,\n )\n assert os.path.exists(model_path_name)\n return torch.load(model_path_name, map_location=\"cpu\")\n\n\ndef read_txt_into_dict(filename):\n result = {}\n with open(filename, \"r\") as file:\n for line_number, line in enumerate(file):\n line = line.strip()\n if line:\n words = line.split()\n key = line_number\n value = words[0]\n result[key] = value\n return result\n\n\ndef set_recursively(key, value, full_name, weight_type, hf_pointer):\n for attribute in key.split(\".\"):\n hf_pointer = getattr(hf_pointer, attribute)\n\n hf_param_name = None\n for param_key in PARAM_MAPPING.keys():\n if full_name.endswith(param_key):\n hf_param_name = PARAM_MAPPING[full_name.split(\".\")[-1]]\n weight_type = \"param\"\n\n if weight_type is not None and weight_type != \"param\":\n hf_shape = getattr(hf_pointer, weight_type).shape\n elif weight_type is not None and weight_type == \"param\":\n shape_pointer = hf_pointer\n for attribute in hf_param_name.split(\".\"):\n shape_pointer = getattr(shape_pointer, attribute)\n hf_shape = shape_pointer.shape\n\n # let's reduce dimension\n value = value[0]\n else:\n hf_shape = hf_pointer.shape\n\n if hf_shape != value.shape:\n raise ValueError(\n f\"Shape of hf {key + '.' + weight_type if weight_type is not None else ''} is {hf_shape}, but should be\"\n f\" {value.shape} for {full_name}\"\n )\n\n if weight_type == \"weight\":\n hf_pointer.weight.data = value\n elif weight_type == \"original0\":\n hf_pointer.original0.data = value\n elif weight_type == \"original1\":\n hf_pointer.original1.data = value\n elif weight_type == \"bias\":\n hf_pointer.bias.data = value\n elif weight_type == \"param\":\n for attribute in hf_param_name.split(\".\"):\n hf_pointer = getattr(hf_pointer, attribute)\n hf_pointer.data = value\n else:\n hf_pointer.data = value\n\n initialized_key = key + \".\" + weight_type if weight_type is not None else \"\"\n logger.info(f\"{initialized_key} was initialized from {full_name}.\")\n\n return initialized_key\n\n\ndef rename_dict(key, value, full_name, weight_type, hf_dict):\n hf_param_name = None\n for param_key in PARAM_MAPPING.keys():\n if full_name.endswith(param_key):\n hf_param_name = PARAM_MAPPING[full_name.split(\".\")[-1]]\n weight_type = \"param\"\n\n if weight_type is not None and weight_type != \"param\":\n full_key = \".\".join([key, weight_type])\n elif weight_type is not None and weight_type == \"param\":\n full_key = \".\".join([key, hf_param_name])\n else:\n full_key = key\n\n hf_dict[full_key] = value if \"lm_head\" in full_key else value[0]\n\n\ndef replace_int_with_asterisk(input_string):\n # Define a regular expression pattern to match integers with dots before and after\n pattern = r\"\\.\\d+\\.\"\n\n # Use re.sub() to replace the matched pattern with '.*.'\n output_string = re.sub(pattern, \".*.\", input_string)\n\n return output_string\n\n\ndef get_layer_id(string: str) -> str:\n # Define a regular expression pattern to match the layer ID\n pattern = r\"encoder\\.transformer\\.layers\\.(\\d+)\\.\"\n\n # Use re.search() to find the layer ID in the string\n match = re.search(pattern, string)\n\n if match:\n # Extract the layer ID from the matched object\n layer_id = match.group(1)\n return layer_id\n else:\n # If no match is found, return None\n return None\n\n\nPARAM_MAPPING = {\n \"W_a\": \"linear_1.weight\",\n \"W_b\": \"linear_2.weight\",\n \"b_a\": \"linear_1.bias\",\n \"b_b\": \"linear_2.bias\",\n \"ln_W\": \"norm.weight\",\n \"ln_b\": \"norm.bias\",\n}\n\n\ndef load_conv_layer(full_name, value, feature_extractor, unused_weights, uninitialized_weights, use_group_norm):\n name = full_name.split(\"conv_layers.\")[-1]\n items = name.split(\".\")\n layer_id = int(items[0])\n layer_type = items[1]\n\n if layer_type == \"conv\":\n if \"bias\" in name:\n if value.shape != feature_extractor.conv_layers[layer_id].conv.bias.data.shape:\n raise ValueError(\n f\"{full_name} has size {value.shape}, but\"\n f\" {feature_extractor.conv_layers[layer_id].conv.bias.data.shape} was found.\"\n )\n feature_extractor.conv_layers[layer_id].conv.bias.data = value\n logger.info(f\"Feat extract conv layer {layer_id} was initialized from {full_name}.\")\n elif \"weight\" in name:\n if value.shape != feature_extractor.conv_layers[layer_id].conv.weight.data.shape:\n raise ValueError(\n f\"{full_name} has size {value.shape}, but\"\n f\" {feature_extractor.conv_layers[layer_id].conv.weight.data.shape} was found.\"\n )\n feature_extractor.conv_layers[layer_id].conv.weight.data = value\n logger.info(f\"Feat extract conv layer {layer_id} was initialized from {full_name}.\")\n elif layer_type == \"layer_norm\":\n if \"bias\" in name:\n if value.shape != feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape:\n raise ValueError(\n f\"{full_name} has size {value.shape}, but\"\n f\" {feature_extractor.conv_layers[layer_id].layer_norm.bias.data.shape} was found.\"\n )\n feature_extractor.conv_layers[layer_id].layer_norm.bias.data = value\n logger.info(f\"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.\")\n elif \"weight\" in name:\n if value.shape != feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape:\n raise ValueError(\n f\"{full_name} has size {value.shape}, but\"\n f\" {feature_extractor.conv_layers[layer_id].layer_norm.weight.data.shape} was found.\"\n )\n feature_extractor.conv_layers[layer_id].layer_norm.weight.data = value\n logger.info(f\"Feat extract layer norm weight of layer {layer_id} was initialized from {full_name}.\")\n else:\n unused_weights.append(full_name)\n\n\ndef load_wav2vec2_layer(name, value, hf_model=None, hf_dict=None):\n is_used = False\n for key, mapped_key in MAPPING.items():\n mapped_key = \"wav2vec2.\" + mapped_key if mapped_key not in TOP_LEVEL_KEYS else mapped_key\n if key in replace_int_with_asterisk(name) or key.split(\"encoder.\")[-1] == name.split(\".\")[0]:\n is_used = True\n if \"*\" in key:\n layer_index = get_layer_id(name)\n if name:\n mapped_key = mapped_key.replace(\"*\", layer_index)\n else:\n raise Exception(f\"Name {name} matched with key {key}\")\n elif \"*\" in mapped_key:\n layer_index = name.split(key)[0].split(\".\")[-2]\n mapped_key = mapped_key.replace(\"*\", layer_index)\n if \"weight_g\" in name:\n weight_type = \"original0\"\n elif \"weight_v\" in name:\n weight_type = \"original1\"\n elif \"bias\" in name:\n weight_type = \"bias\"\n elif \"weight\" in name:\n # TODO: don't match quantizer.weight_proj\n weight_type = \"weight\"\n else:\n weight_type = None\n\n initialized_key = set_recursively(mapped_key, value, name, weight_type, hf_model)\n return is_used, initialized_key\n return is_used, None\n\n\ndef recursively_load_weights(alignment_model, hf_model):\n unused_weights = []\n uninitialized_weights = set(hf_model.state_dict().keys())\n alignment_dict = alignment_model.state_dict()\n\n feature_extractor = hf_model.wav2vec2.feature_extractor\n\n for name, value in alignment_dict.items():\n is_used = False\n initialized_key = None\n if \"conv_layers\" in name:\n load_conv_layer(\n name,\n value,\n feature_extractor,\n unused_weights,\n uninitialized_weights,\n hf_model.config.feat_extract_norm == \"group\",\n )\n is_used = True\n else:\n is_used, initialized_key = load_wav2vec2_layer(name, value, hf_model, uninitialized_weights)\n if not is_used:\n unused_weights.append(name)\n elif initialized_key:\n uninitialized_weights.remove(initialized_key)\n\n logger.warning(f\"Unused weights: {unused_weights}\")\n\n logger.warning(f\"Unintialized weights: {uninitialized_weights}\")\n\n\n@torch.no_grad()\ndef convert_wav2vec2_alignment_checkpoint(\n pytorch_dump_folder_path: str, config_path: str, save_pretrained: bool = False\n):\n \"\"\"\n Copy/paste/tweak model's weights to transformers design.\n \"\"\"\n\n config = Wav2Vec2Config.from_pretrained(config_path)\n hf_wav2vec = Wav2Vec2ForAudioFrameClassification(config)\n\n model = wav2vec2_model(\n extractor_mode=\"layer_norm\",\n extractor_conv_layer_config=[\n (512, 10, 5),\n (512, 3, 2),\n (512, 3, 2),\n (512, 3, 2),\n (512, 3, 2),\n (512, 2, 2),\n (512, 2, 2),\n ],\n extractor_conv_bias=True,\n encoder_embed_dim=1024,\n encoder_projection_dropout=0.0,\n encoder_pos_conv_kernel=128,\n encoder_pos_conv_groups=16,\n encoder_num_layers=24,\n encoder_num_heads=16,\n encoder_attention_dropout=0.0,\n encoder_ff_interm_features=4096,\n encoder_ff_interm_dropout=0.1,\n encoder_dropout=0.0,\n encoder_layer_norm_first=True,\n encoder_layer_drop=0.1,\n aux_num_out=31,\n )\n state_dict = check_or_download_model_weights()\n model.load_state_dict(state_dict)\n model = model.eval()\n\n recursively_load_weights(model, hf_wav2vec)\n\n if save_pretrained:\n hf_wav2vec.save_pretrained(pytorch_dump_folder_path)\n\n return hf_wav2vec","metadata":{"execution":{"iopub.status.busy":"2023-08-14T06:33:11.991260Z","iopub.execute_input":"2023-08-14T06:33:11.991699Z","iopub.status.idle":"2023-08-14T06:33:12.055023Z","shell.execute_reply.started":"2023-08-14T06:33:11.991665Z","shell.execute_reply":"2023-08-14T06:33:12.053857Z"},"jupyter":{"source_hidden":true},"trusted":true},"execution_count":6,"outputs":[]},{"cell_type":"code","source":"config_path = \"wav2vec2_alignment_config.json\"\npytorch_model_path = \"torch_mms_alignment_model\"\nconvert_wav2vec2_alignment_checkpoint(pytorch_model_path, config_path, save_pretrained = True)","metadata":{"execution":{"iopub.status.busy":"2023-08-14T06:37:53.577630Z","iopub.execute_input":"2023-08-14T06:37:53.578100Z","iopub.status.idle":"2023-08-14T06:38:08.217112Z","shell.execute_reply.started":"2023-08-14T06:37:53.578045Z","shell.execute_reply":"2023-08-14T06:38:08.215686Z"},"collapsed":true,"jupyter":{"outputs_hidden":true},"trusted":true},"execution_count":11,"outputs":[{"name":"stderr","text":"\u001b[32m2023-08-14 06:38:03.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_or_download_model_weights\u001b[0m:\u001b[36m54\u001b[0m - \u001b[1mDownloading model and dictionary...\u001b[0m\n\u001b[32m2023-08-14 06:38:03.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mcheck_or_download_model_weights\u001b[0m:\u001b[36m56\u001b[0m - \u001b[1mModel path already exists. Skipping downloading....\u001b[0m\n\u001b[32m2023-08-14 06:38:05.083\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m221\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 0 was initialized from feature_extractor.conv_layers.0.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.085\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m213\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 0 was initialized from feature_extractor.conv_layers.0.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.087\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m204\u001b[0m - \u001b[1mFeat extract conv layer 0 was initialized from feature_extractor.conv_layers.0.conv.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.089\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m196\u001b[0m - \u001b[1mFeat extract conv layer 0 was initialized from feature_extractor.conv_layers.0.conv.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.090\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m221\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 1 was initialized from feature_extractor.conv_layers.1.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m213\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 1 was initialized from feature_extractor.conv_layers.1.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m204\u001b[0m - \u001b[1mFeat extract conv layer 1 was initialized from feature_extractor.conv_layers.1.conv.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.096\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m196\u001b[0m - \u001b[1mFeat extract conv layer 1 was initialized from feature_extractor.conv_layers.1.conv.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.098\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m221\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 2 was initialized from feature_extractor.conv_layers.2.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m213\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 2 was initialized from feature_extractor.conv_layers.2.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m204\u001b[0m - \u001b[1mFeat extract conv layer 2 was initialized from feature_extractor.conv_layers.2.conv.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.103\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m196\u001b[0m - \u001b[1mFeat extract conv layer 2 was initialized from feature_extractor.conv_layers.2.conv.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.106\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m221\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 3 was initialized from feature_extractor.conv_layers.3.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.107\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m213\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 3 was initialized from feature_extractor.conv_layers.3.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m204\u001b[0m - \u001b[1mFeat extract conv layer 3 was initialized from feature_extractor.conv_layers.3.conv.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m196\u001b[0m - \u001b[1mFeat extract conv layer 3 was initialized from feature_extractor.conv_layers.3.conv.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.112\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m221\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 4 was initialized from feature_extractor.conv_layers.4.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m213\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 4 was initialized from feature_extractor.conv_layers.4.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.114\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m204\u001b[0m - \u001b[1mFeat extract conv layer 4 was initialized from feature_extractor.conv_layers.4.conv.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m196\u001b[0m - \u001b[1mFeat extract conv layer 4 was initialized from feature_extractor.conv_layers.4.conv.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.116\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m221\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 5 was initialized from feature_extractor.conv_layers.5.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m213\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 5 was initialized from feature_extractor.conv_layers.5.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.119\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m204\u001b[0m - \u001b[1mFeat extract conv layer 5 was initialized from feature_extractor.conv_layers.5.conv.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m196\u001b[0m - \u001b[1mFeat extract conv layer 5 was initialized from feature_extractor.conv_layers.5.conv.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.121\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m221\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 6 was initialized from feature_extractor.conv_layers.6.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.125\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m213\u001b[0m - \u001b[1mFeat extract layer norm weight of layer 6 was initialized from feature_extractor.conv_layers.6.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.126\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m204\u001b[0m - \u001b[1mFeat extract conv layer 6 was initialized from feature_extractor.conv_layers.6.conv.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.127\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mload_conv_layer\u001b[0m:\u001b[36m196\u001b[0m - \u001b[1mFeat extract conv layer 6 was initialized from feature_extractor.conv_layers.6.conv.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.feature_projection.layer_norm.weight was initialized from encoder.feature_projection.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.131\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.feature_projection.layer_norm.bias was initialized from encoder.feature_projection.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.feature_projection.projection.weight was initialized from encoder.feature_projection.projection.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.134\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.feature_projection.projection.bias was initialized from encoder.feature_projection.projection.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.pos_conv_embed.conv.bias was initialized from encoder.transformer.pos_conv_embed.conv.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.137\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original0 was initialized from encoder.transformer.pos_conv_embed.conv.weight_g.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.140\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.pos_conv_embed.conv.parametrizations.weight.original1 was initialized from encoder.transformer.pos_conv_embed.conv.weight_v.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layer_norm.weight was initialized from encoder.transformer.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layer_norm.bias was initialized from encoder.transformer.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.150\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.attention.k_proj.weight was initialized from encoder.transformer.layers.0.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.151\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.attention.k_proj.bias was initialized from encoder.transformer.layers.0.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.152\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.attention.v_proj.weight was initialized from encoder.transformer.layers.0.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.attention.v_proj.bias was initialized from encoder.transformer.layers.0.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.155\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.attention.q_proj.weight was initialized from encoder.transformer.layers.0.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.158\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.attention.q_proj.bias was initialized from encoder.transformer.layers.0.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.159\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.attention.out_proj.weight was initialized from encoder.transformer.layers.0.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.attention.out_proj.bias was initialized from encoder.transformer.layers.0.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.162\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.layer_norm.weight was initialized from encoder.transformer.layers.0.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.163\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.layer_norm.bias was initialized from encoder.transformer.layers.0.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.165\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.0.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.0.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.0.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.168\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.0.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.final_layer_norm.weight was initialized from encoder.transformer.layers.0.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.0.final_layer_norm.bias was initialized from encoder.transformer.layers.0.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.172\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.attention.k_proj.weight was initialized from encoder.transformer.layers.1.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.attention.k_proj.bias was initialized from encoder.transformer.layers.1.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.attention.v_proj.weight was initialized from encoder.transformer.layers.1.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.177\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.attention.v_proj.bias was initialized from encoder.transformer.layers.1.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.attention.q_proj.weight was initialized from encoder.transformer.layers.1.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.181\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.attention.q_proj.bias was initialized from encoder.transformer.layers.1.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.attention.out_proj.weight was initialized from encoder.transformer.layers.1.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.attention.out_proj.bias was initialized from encoder.transformer.layers.1.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.layer_norm.weight was initialized from encoder.transformer.layers.1.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.186\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.layer_norm.bias was initialized from encoder.transformer.layers.1.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.187\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.1.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.189\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.1.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.190\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.1.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.191\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.1.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.193\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.final_layer_norm.weight was initialized from encoder.transformer.layers.1.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.195\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.1.final_layer_norm.bias was initialized from encoder.transformer.layers.1.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.attention.k_proj.weight was initialized from encoder.transformer.layers.2.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.197\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.attention.k_proj.bias was initialized from encoder.transformer.layers.2.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.attention.v_proj.weight was initialized from encoder.transformer.layers.2.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.202\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.attention.v_proj.bias was initialized from encoder.transformer.layers.2.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.204\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.attention.q_proj.weight was initialized from encoder.transformer.layers.2.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.205\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.attention.q_proj.bias was initialized from encoder.transformer.layers.2.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.attention.out_proj.weight was initialized from encoder.transformer.layers.2.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.208\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.attention.out_proj.bias was initialized from encoder.transformer.layers.2.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.209\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.layer_norm.weight was initialized from encoder.transformer.layers.2.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.layer_norm.bias was initialized from encoder.transformer.layers.2.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.212\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.2.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.213\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.2.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.214\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.2.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.2.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.final_layer_norm.weight was initialized from encoder.transformer.layers.2.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.219\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.2.final_layer_norm.bias was initialized from encoder.transformer.layers.2.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.221\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.attention.k_proj.weight was initialized from encoder.transformer.layers.3.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.223\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.attention.k_proj.bias was initialized from encoder.transformer.layers.3.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.225\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.attention.v_proj.weight was initialized from encoder.transformer.layers.3.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.226\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.attention.v_proj.bias was initialized from encoder.transformer.layers.3.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.228\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.attention.q_proj.weight was initialized from encoder.transformer.layers.3.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.229\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.attention.q_proj.bias was initialized from encoder.transformer.layers.3.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.231\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.attention.out_proj.weight was initialized from encoder.transformer.layers.3.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.232\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.attention.out_proj.bias was initialized from encoder.transformer.layers.3.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.layer_norm.weight was initialized from encoder.transformer.layers.3.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.layer_norm.bias was initialized from encoder.transformer.layers.3.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.236\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.3.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.241\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.3.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.242\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.3.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.244\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.3.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.final_layer_norm.weight was initialized from encoder.transformer.layers.3.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.3.final_layer_norm.bias was initialized from encoder.transformer.layers.3.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.attention.k_proj.weight was initialized from encoder.transformer.layers.4.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.attention.k_proj.bias was initialized from encoder.transformer.layers.4.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.attention.v_proj.weight was initialized from encoder.transformer.layers.4.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.252\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.attention.v_proj.bias was initialized from encoder.transformer.layers.4.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.253\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.attention.q_proj.weight was initialized from encoder.transformer.layers.4.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.254\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.attention.q_proj.bias was initialized from encoder.transformer.layers.4.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.255\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.attention.out_proj.weight was initialized from encoder.transformer.layers.4.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.256\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.attention.out_proj.bias was initialized from encoder.transformer.layers.4.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.257\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.layer_norm.weight was initialized from encoder.transformer.layers.4.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.259\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.layer_norm.bias was initialized from encoder.transformer.layers.4.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.260\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.4.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.261\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.4.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.262\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.4.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.267\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.4.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.269\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.final_layer_norm.weight was initialized from encoder.transformer.layers.4.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.271\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.4.final_layer_norm.bias was initialized from encoder.transformer.layers.4.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.273\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.attention.k_proj.weight was initialized from encoder.transformer.layers.5.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.attention.k_proj.bias was initialized from encoder.transformer.layers.5.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.277\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.attention.v_proj.weight was initialized from encoder.transformer.layers.5.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.278\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.attention.v_proj.bias was initialized from encoder.transformer.layers.5.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.280\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.attention.q_proj.weight was initialized from encoder.transformer.layers.5.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.attention.q_proj.bias was initialized from encoder.transformer.layers.5.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.283\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.attention.out_proj.weight was initialized from encoder.transformer.layers.5.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.attention.out_proj.bias was initialized from encoder.transformer.layers.5.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.layer_norm.weight was initialized from encoder.transformer.layers.5.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.layer_norm.bias was initialized from encoder.transformer.layers.5.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.289\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.5.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.292\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.5.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.293\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.5.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.295\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.5.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.final_layer_norm.weight was initialized from encoder.transformer.layers.5.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.298\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.5.final_layer_norm.bias was initialized from encoder.transformer.layers.5.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.299\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.attention.k_proj.weight was initialized from encoder.transformer.layers.6.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.attention.k_proj.bias was initialized from encoder.transformer.layers.6.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.301\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.attention.v_proj.weight was initialized from encoder.transformer.layers.6.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.attention.v_proj.bias was initialized from encoder.transformer.layers.6.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.306\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.attention.q_proj.weight was initialized from encoder.transformer.layers.6.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.308\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.attention.q_proj.bias was initialized from encoder.transformer.layers.6.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.309\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.attention.out_proj.weight was initialized from encoder.transformer.layers.6.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.attention.out_proj.bias was initialized from encoder.transformer.layers.6.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.layer_norm.weight was initialized from encoder.transformer.layers.6.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.layer_norm.bias was initialized from encoder.transformer.layers.6.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.316\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.6.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.6.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.319\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.6.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.320\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.6.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.final_layer_norm.weight was initialized from encoder.transformer.layers.6.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.324\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.6.final_layer_norm.bias was initialized from encoder.transformer.layers.6.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.attention.k_proj.weight was initialized from encoder.transformer.layers.7.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.attention.k_proj.bias was initialized from encoder.transformer.layers.7.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.327\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.attention.v_proj.weight was initialized from encoder.transformer.layers.7.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.attention.v_proj.bias was initialized from encoder.transformer.layers.7.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.329\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.attention.q_proj.weight was initialized from encoder.transformer.layers.7.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.330\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.attention.q_proj.bias was initialized from encoder.transformer.layers.7.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.331\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.attention.out_proj.weight was initialized from encoder.transformer.layers.7.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.attention.out_proj.bias was initialized from encoder.transformer.layers.7.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.334\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.layer_norm.weight was initialized from encoder.transformer.layers.7.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.335\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.layer_norm.bias was initialized from encoder.transformer.layers.7.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.7.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.337\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.7.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.338\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.7.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.339\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.7.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.final_layer_norm.weight was initialized from encoder.transformer.layers.7.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.341\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.7.final_layer_norm.bias was initialized from encoder.transformer.layers.7.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.342\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.attention.k_proj.weight was initialized from encoder.transformer.layers.8.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.attention.k_proj.bias was initialized from encoder.transformer.layers.8.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.attention.v_proj.weight was initialized from encoder.transformer.layers.8.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.attention.v_proj.bias was initialized from encoder.transformer.layers.8.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.attention.q_proj.weight was initialized from encoder.transformer.layers.8.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.attention.q_proj.bias was initialized from encoder.transformer.layers.8.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.attention.out_proj.weight was initialized from encoder.transformer.layers.8.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.350\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.attention.out_proj.bias was initialized from encoder.transformer.layers.8.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.351\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.layer_norm.weight was initialized from encoder.transformer.layers.8.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.layer_norm.bias was initialized from encoder.transformer.layers.8.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.8.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.354\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.8.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.355\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.8.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.356\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.8.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.357\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.final_layer_norm.weight was initialized from encoder.transformer.layers.8.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.358\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.8.final_layer_norm.bias was initialized from encoder.transformer.layers.8.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.359\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.attention.k_proj.weight was initialized from encoder.transformer.layers.9.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.361\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.attention.k_proj.bias was initialized from encoder.transformer.layers.9.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.362\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.attention.v_proj.weight was initialized from encoder.transformer.layers.9.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.363\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.attention.v_proj.bias was initialized from encoder.transformer.layers.9.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.364\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.attention.q_proj.weight was initialized from encoder.transformer.layers.9.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.365\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.attention.q_proj.bias was initialized from encoder.transformer.layers.9.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.367\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.attention.out_proj.weight was initialized from encoder.transformer.layers.9.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.attention.out_proj.bias was initialized from encoder.transformer.layers.9.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.369\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.layer_norm.weight was initialized from encoder.transformer.layers.9.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.layer_norm.bias was initialized from encoder.transformer.layers.9.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.371\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.9.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.9.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.374\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.9.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.375\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.9.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.376\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.final_layer_norm.weight was initialized from encoder.transformer.layers.9.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.377\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.9.final_layer_norm.bias was initialized from encoder.transformer.layers.9.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.378\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.attention.k_proj.weight was initialized from encoder.transformer.layers.10.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.379\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.attention.k_proj.bias was initialized from encoder.transformer.layers.10.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.380\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.attention.v_proj.weight was initialized from encoder.transformer.layers.10.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.382\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.attention.v_proj.bias was initialized from encoder.transformer.layers.10.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.383\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.attention.q_proj.weight was initialized from encoder.transformer.layers.10.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.384\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.attention.q_proj.bias was initialized from encoder.transformer.layers.10.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.attention.out_proj.weight was initialized from encoder.transformer.layers.10.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.387\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.attention.out_proj.bias was initialized from encoder.transformer.layers.10.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.layer_norm.weight was initialized from encoder.transformer.layers.10.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.389\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.layer_norm.bias was initialized from encoder.transformer.layers.10.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.10.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.391\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.10.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.10.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.10.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.395\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.final_layer_norm.weight was initialized from encoder.transformer.layers.10.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.10.final_layer_norm.bias was initialized from encoder.transformer.layers.10.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.attention.k_proj.weight was initialized from encoder.transformer.layers.11.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.attention.k_proj.bias was initialized from encoder.transformer.layers.11.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.403\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.attention.v_proj.weight was initialized from encoder.transformer.layers.11.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.404\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.attention.v_proj.bias was initialized from encoder.transformer.layers.11.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.405\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.attention.q_proj.weight was initialized from encoder.transformer.layers.11.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.attention.q_proj.bias was initialized from encoder.transformer.layers.11.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.attention.out_proj.weight was initialized from encoder.transformer.layers.11.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.attention.out_proj.bias was initialized from encoder.transformer.layers.11.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.layer_norm.weight was initialized from encoder.transformer.layers.11.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.layer_norm.bias was initialized from encoder.transformer.layers.11.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.11.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.414\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.11.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.11.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.11.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.430\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.final_layer_norm.weight was initialized from encoder.transformer.layers.11.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.431\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.11.final_layer_norm.bias was initialized from encoder.transformer.layers.11.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.432\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.attention.k_proj.weight was initialized from encoder.transformer.layers.12.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.attention.k_proj.bias was initialized from encoder.transformer.layers.12.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.437\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.attention.v_proj.weight was initialized from encoder.transformer.layers.12.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.438\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.attention.v_proj.bias was initialized from encoder.transformer.layers.12.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.439\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.attention.q_proj.weight was initialized from encoder.transformer.layers.12.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.attention.q_proj.bias was initialized from encoder.transformer.layers.12.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.attention.out_proj.weight was initialized from encoder.transformer.layers.12.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.attention.out_proj.bias was initialized from encoder.transformer.layers.12.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.444\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.layer_norm.weight was initialized from encoder.transformer.layers.12.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.445\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.layer_norm.bias was initialized from encoder.transformer.layers.12.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.446\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.12.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.12.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.449\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.12.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.12.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.final_layer_norm.weight was initialized from encoder.transformer.layers.12.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.452\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.12.final_layer_norm.bias was initialized from encoder.transformer.layers.12.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.attention.k_proj.weight was initialized from encoder.transformer.layers.13.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.attention.k_proj.bias was initialized from encoder.transformer.layers.13.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.459\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.attention.v_proj.weight was initialized from encoder.transformer.layers.13.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.461\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.attention.v_proj.bias was initialized from encoder.transformer.layers.13.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.463\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.attention.q_proj.weight was initialized from encoder.transformer.layers.13.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.464\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.attention.q_proj.bias was initialized from encoder.transformer.layers.13.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.465\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.attention.out_proj.weight was initialized from encoder.transformer.layers.13.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.466\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.attention.out_proj.bias was initialized from encoder.transformer.layers.13.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.467\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.layer_norm.weight was initialized from encoder.transformer.layers.13.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.layer_norm.bias was initialized from encoder.transformer.layers.13.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.13.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.13.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.13.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.13.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.474\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.final_layer_norm.weight was initialized from encoder.transformer.layers.13.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.475\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.13.final_layer_norm.bias was initialized from encoder.transformer.layers.13.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.476\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.attention.k_proj.weight was initialized from encoder.transformer.layers.14.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.477\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.attention.k_proj.bias was initialized from encoder.transformer.layers.14.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.478\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.attention.v_proj.weight was initialized from encoder.transformer.layers.14.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.479\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.attention.v_proj.bias was initialized from encoder.transformer.layers.14.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.481\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.attention.q_proj.weight was initialized from encoder.transformer.layers.14.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.482\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.attention.q_proj.bias was initialized from encoder.transformer.layers.14.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.483\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.attention.out_proj.weight was initialized from encoder.transformer.layers.14.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.484\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.attention.out_proj.bias was initialized from encoder.transformer.layers.14.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.layer_norm.weight was initialized from encoder.transformer.layers.14.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.486\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.layer_norm.bias was initialized from encoder.transformer.layers.14.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.487\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.14.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.14.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.489\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.14.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.14.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.491\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.final_layer_norm.weight was initialized from encoder.transformer.layers.14.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.493\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.14.final_layer_norm.bias was initialized from encoder.transformer.layers.14.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.494\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.attention.k_proj.weight was initialized from encoder.transformer.layers.15.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.attention.k_proj.bias was initialized from encoder.transformer.layers.15.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.attention.v_proj.weight was initialized from encoder.transformer.layers.15.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.498\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.attention.v_proj.bias was initialized from encoder.transformer.layers.15.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.499\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.attention.q_proj.weight was initialized from encoder.transformer.layers.15.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.500\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.attention.q_proj.bias was initialized from encoder.transformer.layers.15.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.attention.out_proj.weight was initialized from encoder.transformer.layers.15.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.502\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.attention.out_proj.bias was initialized from encoder.transformer.layers.15.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.503\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.layer_norm.weight was initialized from encoder.transformer.layers.15.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.layer_norm.bias was initialized from encoder.transformer.layers.15.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.15.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.507\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.15.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.508\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.15.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.509\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.15.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.510\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.final_layer_norm.weight was initialized from encoder.transformer.layers.15.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.15.final_layer_norm.bias was initialized from encoder.transformer.layers.15.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.512\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.attention.k_proj.weight was initialized from encoder.transformer.layers.16.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.attention.k_proj.bias was initialized from encoder.transformer.layers.16.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.515\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.attention.v_proj.weight was initialized from encoder.transformer.layers.16.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.516\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.attention.v_proj.bias was initialized from encoder.transformer.layers.16.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.517\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.attention.q_proj.weight was initialized from encoder.transformer.layers.16.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.518\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.attention.q_proj.bias was initialized from encoder.transformer.layers.16.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.attention.out_proj.weight was initialized from encoder.transformer.layers.16.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.attention.out_proj.bias was initialized from encoder.transformer.layers.16.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.521\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.layer_norm.weight was initialized from encoder.transformer.layers.16.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.layer_norm.bias was initialized from encoder.transformer.layers.16.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.16.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.535\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.16.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.536\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.16.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.537\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.16.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.final_layer_norm.weight was initialized from encoder.transformer.layers.16.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.16.final_layer_norm.bias was initialized from encoder.transformer.layers.16.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.541\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.attention.k_proj.weight was initialized from encoder.transformer.layers.17.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.542\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.attention.k_proj.bias was initialized from encoder.transformer.layers.17.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.543\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.attention.v_proj.weight was initialized from encoder.transformer.layers.17.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.544\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.attention.v_proj.bias was initialized from encoder.transformer.layers.17.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.545\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.attention.q_proj.weight was initialized from encoder.transformer.layers.17.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.attention.q_proj.bias was initialized from encoder.transformer.layers.17.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.attention.out_proj.weight was initialized from encoder.transformer.layers.17.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.attention.out_proj.bias was initialized from encoder.transformer.layers.17.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.552\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.layer_norm.weight was initialized from encoder.transformer.layers.17.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.layer_norm.bias was initialized from encoder.transformer.layers.17.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.555\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.17.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.556\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.17.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.558\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.17.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.17.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.560\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.final_layer_norm.weight was initialized from encoder.transformer.layers.17.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.17.final_layer_norm.bias was initialized from encoder.transformer.layers.17.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.562\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.attention.k_proj.weight was initialized from encoder.transformer.layers.18.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.566\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.attention.k_proj.bias was initialized from encoder.transformer.layers.18.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.567\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.attention.v_proj.weight was initialized from encoder.transformer.layers.18.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.571\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.attention.v_proj.bias was initialized from encoder.transformer.layers.18.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.572\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.attention.q_proj.weight was initialized from encoder.transformer.layers.18.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.575\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.attention.q_proj.bias was initialized from encoder.transformer.layers.18.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.attention.out_proj.weight was initialized from encoder.transformer.layers.18.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.578\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.attention.out_proj.bias was initialized from encoder.transformer.layers.18.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.layer_norm.weight was initialized from encoder.transformer.layers.18.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.layer_norm.bias was initialized from encoder.transformer.layers.18.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.582\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.18.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.18.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.584\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.18.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.18.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.587\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.final_layer_norm.weight was initialized from encoder.transformer.layers.18.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.588\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.18.final_layer_norm.bias was initialized from encoder.transformer.layers.18.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.589\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.attention.k_proj.weight was initialized from encoder.transformer.layers.19.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.591\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.attention.k_proj.bias was initialized from encoder.transformer.layers.19.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.592\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.attention.v_proj.weight was initialized from encoder.transformer.layers.19.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.593\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.attention.v_proj.bias was initialized from encoder.transformer.layers.19.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.594\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.attention.q_proj.weight was initialized from encoder.transformer.layers.19.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.597\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.attention.q_proj.bias was initialized from encoder.transformer.layers.19.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.attention.out_proj.weight was initialized from encoder.transformer.layers.19.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.600\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.attention.out_proj.bias was initialized from encoder.transformer.layers.19.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.601\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.layer_norm.weight was initialized from encoder.transformer.layers.19.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.602\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.layer_norm.bias was initialized from encoder.transformer.layers.19.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.604\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.19.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.605\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.19.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.19.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.607\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.19.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.final_layer_norm.weight was initialized from encoder.transformer.layers.19.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.19.final_layer_norm.bias was initialized from encoder.transformer.layers.19.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.611\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.attention.k_proj.weight was initialized from encoder.transformer.layers.20.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.612\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.attention.k_proj.bias was initialized from encoder.transformer.layers.20.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.attention.v_proj.weight was initialized from encoder.transformer.layers.20.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.615\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.attention.v_proj.bias was initialized from encoder.transformer.layers.20.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.attention.q_proj.weight was initialized from encoder.transformer.layers.20.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.attention.q_proj.bias was initialized from encoder.transformer.layers.20.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.attention.out_proj.weight was initialized from encoder.transformer.layers.20.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.627\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.attention.out_proj.bias was initialized from encoder.transformer.layers.20.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.629\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.layer_norm.weight was initialized from encoder.transformer.layers.20.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.630\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.layer_norm.bias was initialized from encoder.transformer.layers.20.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.631\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.20.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.632\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.20.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.20.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.20.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.final_layer_norm.weight was initialized from encoder.transformer.layers.20.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.20.final_layer_norm.bias was initialized from encoder.transformer.layers.20.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.attention.k_proj.weight was initialized from encoder.transformer.layers.21.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.641\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.attention.k_proj.bias was initialized from encoder.transformer.layers.21.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.642\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.attention.v_proj.weight was initialized from encoder.transformer.layers.21.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.643\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.attention.v_proj.bias was initialized from encoder.transformer.layers.21.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.645\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.attention.q_proj.weight was initialized from encoder.transformer.layers.21.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.attention.q_proj.bias was initialized from encoder.transformer.layers.21.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.647\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.attention.out_proj.weight was initialized from encoder.transformer.layers.21.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.648\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.attention.out_proj.bias was initialized from encoder.transformer.layers.21.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.layer_norm.weight was initialized from encoder.transformer.layers.21.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.650\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.layer_norm.bias was initialized from encoder.transformer.layers.21.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.655\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.21.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.657\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.21.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.658\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.21.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.660\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.21.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.661\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.final_layer_norm.weight was initialized from encoder.transformer.layers.21.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.662\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.21.final_layer_norm.bias was initialized from encoder.transformer.layers.21.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.663\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.attention.k_proj.weight was initialized from encoder.transformer.layers.22.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.664\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.attention.k_proj.bias was initialized from encoder.transformer.layers.22.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.665\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.attention.v_proj.weight was initialized from encoder.transformer.layers.22.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.attention.v_proj.bias was initialized from encoder.transformer.layers.22.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.668\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.attention.q_proj.weight was initialized from encoder.transformer.layers.22.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.669\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.attention.q_proj.bias was initialized from encoder.transformer.layers.22.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.670\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.attention.out_proj.weight was initialized from encoder.transformer.layers.22.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.671\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.attention.out_proj.bias was initialized from encoder.transformer.layers.22.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.672\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.layer_norm.weight was initialized from encoder.transformer.layers.22.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.673\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.layer_norm.bias was initialized from encoder.transformer.layers.22.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.674\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.22.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.675\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.22.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.676\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.22.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.22.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.final_layer_norm.weight was initialized from encoder.transformer.layers.22.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.679\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.22.final_layer_norm.bias was initialized from encoder.transformer.layers.22.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.680\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.attention.k_proj.weight was initialized from encoder.transformer.layers.23.attention.k_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.attention.k_proj.bias was initialized from encoder.transformer.layers.23.attention.k_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.attention.v_proj.weight was initialized from encoder.transformer.layers.23.attention.v_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.689\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.attention.v_proj.bias was initialized from encoder.transformer.layers.23.attention.v_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.690\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.attention.q_proj.weight was initialized from encoder.transformer.layers.23.attention.q_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.attention.q_proj.bias was initialized from encoder.transformer.layers.23.attention.q_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.attention.out_proj.weight was initialized from encoder.transformer.layers.23.attention.out_proj.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.attention.out_proj.bias was initialized from encoder.transformer.layers.23.attention.out_proj.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.695\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.layer_norm.weight was initialized from encoder.transformer.layers.23.layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.696\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.layer_norm.bias was initialized from encoder.transformer.layers.23.layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.697\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.feed_forward.intermediate_dense.weight was initialized from encoder.transformer.layers.23.feed_forward.intermediate_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.698\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.feed_forward.intermediate_dense.bias was initialized from encoder.transformer.layers.23.feed_forward.intermediate_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.700\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.feed_forward.output_dense.weight was initialized from encoder.transformer.layers.23.feed_forward.output_dense.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.feed_forward.output_dense.bias was initialized from encoder.transformer.layers.23.feed_forward.output_dense.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.final_layer_norm.weight was initialized from encoder.transformer.layers.23.final_layer_norm.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.704\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mwav2vec2.encoder.layers.23.final_layer_norm.bias was initialized from encoder.transformer.layers.23.final_layer_norm.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.705\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mclassifier.weight was initialized from aux.weight.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mset_recursively\u001b[0m:\u001b[36m124\u001b[0m - \u001b[1mclassifier.bias was initialized from aux.bias.\u001b[0m\n\u001b[32m2023-08-14 06:38:05.707\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mrecursively_load_weights\u001b[0m:\u001b[36m285\u001b[0m - \u001b[33m\u001b[1mUnused weights: []\u001b[0m\n\u001b[32m2023-08-14 06:38:05.708\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36mrecursively_load_weights\u001b[0m:\u001b[36m287\u001b[0m - \u001b[33m\u001b[1mUnintialized weights: {'wav2vec2.feature_extractor.conv_layers.1.layer_norm.bias', 'wav2vec2.feature_extractor.conv_layers.3.layer_norm.weight', 'wav2vec2.feature_extractor.conv_layers.4.conv.bias', 'wav2vec2.feature_extractor.conv_layers.2.layer_norm.weight', 'wav2vec2.feature_extractor.conv_layers.6.conv.weight', 'wav2vec2.feature_extractor.conv_layers.5.layer_norm.weight', 'wav2vec2.feature_extractor.conv_layers.5.conv.weight', 'wav2vec2.feature_extractor.conv_layers.0.conv.bias', 'wav2vec2.feature_extractor.conv_layers.1.conv.weight', 'wav2vec2.feature_extractor.conv_layers.1.conv.bias', 'wav2vec2.feature_extractor.conv_layers.4.conv.weight', 'wav2vec2.feature_extractor.conv_layers.6.layer_norm.weight', 'wav2vec2.feature_extractor.conv_layers.2.conv.weight', 'wav2vec2.feature_extractor.conv_layers.2.layer_norm.bias', 'wav2vec2.feature_extractor.conv_layers.4.layer_norm.weight', 'wav2vec2.feature_extractor.conv_layers.3.layer_norm.bias', 'wav2vec2.feature_extractor.conv_layers.5.conv.bias', 'wav2vec2.feature_extractor.conv_layers.4.layer_norm.bias', 'wav2vec2.feature_extractor.conv_layers.1.layer_norm.weight', 'wav2vec2.feature_extractor.conv_layers.6.layer_norm.bias', 'wav2vec2.feature_extractor.conv_layers.6.conv.bias', 'wav2vec2.feature_extractor.conv_layers.3.conv.weight', 'wav2vec2.feature_extractor.conv_layers.2.conv.bias', 'wav2vec2.feature_extractor.conv_layers.0.layer_norm.bias', 'wav2vec2.feature_extractor.conv_layers.0.layer_norm.weight', 'wav2vec2.feature_extractor.conv_layers.0.conv.weight', 'wav2vec2.feature_extractor.conv_layers.3.conv.bias', 'wav2vec2.feature_extractor.conv_layers.5.layer_norm.bias'}\u001b[0m\n","output_type":"stream"},{"execution_count":11,"output_type":"execute_result","data":{"text/plain":"Wav2Vec2ForAudioFrameClassification(\n (wav2vec2): Wav2Vec2Model(\n (feature_extractor): Wav2Vec2FeatureEncoder(\n (conv_layers): ModuleList(\n (0): Wav2Vec2LayerNormConvLayer(\n (conv): Conv1d(1, 512, kernel_size=(10,), stride=(5,))\n (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n (activation): GELUActivation()\n )\n (1-4): 4 x Wav2Vec2LayerNormConvLayer(\n (conv): Conv1d(512, 512, kernel_size=(3,), stride=(2,))\n (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n (activation): GELUActivation()\n )\n (5-6): 2 x Wav2Vec2LayerNormConvLayer(\n (conv): Conv1d(512, 512, kernel_size=(2,), stride=(2,))\n (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n (activation): GELUActivation()\n )\n )\n )\n (feature_projection): Wav2Vec2FeatureProjection(\n (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)\n (projection): Linear(in_features=512, out_features=1024, bias=True)\n (dropout): Dropout(p=0.0, inplace=False)\n )\n (encoder): Wav2Vec2EncoderStableLayerNorm(\n (pos_conv_embed): Wav2Vec2PositionalConvEmbedding(\n (conv): ParametrizedConv1d(\n 1024, 1024, kernel_size=(128,), stride=(1,), padding=(64,), groups=16\n (parametrizations): ModuleDict(\n (weight): ParametrizationList(\n (0): _WeightNorm()\n )\n )\n )\n (padding): Wav2Vec2SamePadLayer()\n (activation): GELUActivation()\n )\n (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n (dropout): Dropout(p=0.0, inplace=False)\n (layers): ModuleList(\n (0-23): 24 x Wav2Vec2EncoderLayerStableLayerNorm(\n (attention): Wav2Vec2Attention(\n (k_proj): Linear(in_features=1024, out_features=1024, bias=True)\n (v_proj): Linear(in_features=1024, out_features=1024, bias=True)\n (q_proj): Linear(in_features=1024, out_features=1024, bias=True)\n (out_proj): Linear(in_features=1024, out_features=1024, bias=True)\n )\n (dropout): Dropout(p=0.0, inplace=False)\n (layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n (feed_forward): Wav2Vec2FeedForward(\n (intermediate_dropout): Dropout(p=0.1, inplace=False)\n (intermediate_dense): Linear(in_features=1024, out_features=4096, bias=True)\n (intermediate_act_fn): GELUActivation()\n (output_dense): Linear(in_features=4096, out_features=1024, bias=True)\n (output_dropout): Dropout(p=0.0, inplace=False)\n )\n (final_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)\n )\n )\n )\n )\n (classifier): Linear(in_features=1024, out_features=31, bias=True)\n)"},"metadata":{}}]},{"cell_type":"code","source":"from typing import Optional, Union\n\nimport jax\nimport jax.numpy as jnp\nimport flax.linen as nn\n\nfrom transformers.modeling_flax_outputs import FlaxCausalLMOutput\nfrom transformers.models.wav2vec2.configuration_wav2vec2 import Wav2Vec2Config\nfrom transformers.models.wav2vec2.modeling_flax_wav2vec2 import (\n FlaxWav2Vec2FeatureEncoder,\n FlaxWav2Vec2FeatureProjection,\n FlaxWav2Vec2StableLayerNormEncoder,\n FlaxWav2Vec2Adapter,\n FlaxWav2Vec2PreTrainedModel,\n FlaxWav2Vec2BaseModelOutput,\n)\n\n\nclass FlaxWav2Vec2Module(nn.Module):\n config: Wav2Vec2Config\n dtype: jnp.dtype = jnp.float32\n\n def setup(self):\n self.feature_extractor = FlaxWav2Vec2FeatureEncoder(self.config, dtype=self.dtype)\n self.feature_projection = FlaxWav2Vec2FeatureProjection(self.config, dtype=self.dtype)\n if self.config.mask_time_prob > 0.0 or self.config.mask_feature_prob > 0.0:\n self.masked_spec_embed = self.param(\n \"masked_spec_embed\", jax.nn.initializers.uniform(), (self.config.hidden_size,)\n )\n\n if self.config.do_stable_layer_norm:\n self.encoder = FlaxWav2Vec2StableLayerNormEncoder(self.config, dtype=self.dtype)\n else:\n raise NotImplementedError(\"``config.do_stable_layer_norm is False`` is currently not supported.\")\n\n self.adapter = FlaxWav2Vec2Adapter(self.config, dtype=self.dtype) if self.config.add_adapter else None\n\n def __call__(\n self,\n input_values,\n attention_mask=None,\n mask_time_indices=None,\n deterministic=True,\n output_attentions=None,\n output_hidden_states=None,\n freeze_feature_encoder=False,\n return_dict=None,\n ):\n extract_features = self.feature_extractor(input_values, freeze_feature_encoder=freeze_feature_encoder)\n\n # make sure that no loss is computed on padded inputs\n if attention_mask is not None:\n # compute reduced attention_mask corresponding to feature vectors\n attention_mask = self._get_feature_vector_attention_mask(\n extract_features.shape[1], attention_mask, add_adapter=False\n )\n\n hidden_states, extract_features = self.feature_projection(extract_features, deterministic=deterministic)\n if mask_time_indices is not None: # apply SpecAugment along time axis with given indices\n hidden_states = jnp.where(\n jnp.broadcast_to(mask_time_indices[:, :, None], hidden_states.shape),\n jnp.broadcast_to(self.masked_spec_embed[None, None, :], hidden_states.shape),\n hidden_states,\n )\n\n encoder_outputs = self.encoder(\n hidden_states,\n attention_mask=attention_mask,\n deterministic=deterministic,\n output_attentions=output_attentions,\n output_hidden_states=output_hidden_states,\n return_dict=return_dict,\n )\n\n hidden_states = encoder_outputs[0]\n\n if self.adapter is not None:\n hidden_states = self.adapter(hidden_states)\n\n if not return_dict:\n return (hidden_states, extract_features) + encoder_outputs[1:]\n\n return FlaxWav2Vec2BaseModelOutput(\n last_hidden_state=hidden_states,\n extract_features=extract_features,\n hidden_states=encoder_outputs.hidden_states,\n attentions=encoder_outputs.attentions,\n )\n\n def _get_feat_extract_output_lengths(\n self, input_lengths: Union[jnp.ndarray, int], add_adapter: Optional[bool] = None\n ):\n \"\"\"\n Computes the output length of the convolutional layers\n \"\"\"\n\n add_adapter = self.config.add_adapter if add_adapter is None else add_adapter\n\n def _conv_out_length(input_length, kernel_size, stride):\n # 1D convolutional layer output length formula taken\n # from https://pytorch.org/docs/stable/generated/torch.nn.Conv1d.html\n return (input_length - kernel_size) // stride + 1\n\n for kernel_size, stride in zip(self.config.conv_kernel, self.config.conv_stride):\n input_lengths = _conv_out_length(input_lengths, kernel_size, stride)\n\n if add_adapter:\n for _ in range(self.config.num_adapter_layers):\n input_lengths = _conv_out_length(input_lengths, 1, self.config.adapter_stride)\n\n return input_lengths\n\n def _get_feature_vector_attention_mask(\n self, feature_vector_length: int, attention_mask: jnp.ndarray, add_adapter=None\n ):\n # Effectively attention_mask.sum(-1), but not inplace to be able to run\n # on inference mode.\n non_padded_lengths = attention_mask.cumsum(axis=-1)[:, -1]\n\n output_lengths = self._get_feat_extract_output_lengths(non_padded_lengths, add_adapter=add_adapter)\n\n batch_size = attention_mask.shape[0]\n\n attention_mask = jnp.zeros((batch_size, feature_vector_length), dtype=attention_mask.dtype)\n # these two operations makes sure that all values\n # before the output lengths indices are attended to\n attention_mask = attention_mask.at[jnp.arange(attention_mask.shape[0]), output_lengths - 1].set(1)\n attention_mask = jnp.flip(jnp.flip(attention_mask, -1).cumsum(-1), -1).astype(\"bool\")\n return attention_mask\n\n\nclass FlaxWav2Vec2Model(FlaxWav2Vec2PreTrainedModel):\n module_class = FlaxWav2Vec2Module\n\n\nclass FlaxWav2Vec2ForAudioFrameClassificationModule(nn.Module):\n config: Wav2Vec2Config\n dtype: jnp.dtype = jnp.float32\n\n def setup(self):\n self.wav2vec2 = FlaxWav2Vec2Module(self.config, dtype=self.dtype)\n self.classifier = nn.Dense(\n self.config.num_labels,\n kernel_init=jax.nn.initializers.normal(self.config.initializer_range),\n dtype=self.dtype,\n )\n\n def __call__(\n self,\n input_values,\n attention_mask=None,\n mask_time_indices=None,\n deterministic=True,\n output_attentions=None,\n output_hidden_states=None,\n freeze_feature_encoder=False,\n return_dict=None,\n ):\n outputs = self.wav2vec2(\n input_values,\n attention_mask=attention_mask,\n mask_time_indices=mask_time_indices,\n deterministic=deterministic,\n output_attentions=output_attentions,\n output_hidden_states=output_hidden_states,\n freeze_feature_encoder=freeze_feature_encoder,\n return_dict=return_dict,\n )\n\n hidden_states = outputs[0]\n\n logits = self.classifier(hidden_states)\n\n if not return_dict:\n return (logits,) + outputs[2:]\n\n return FlaxCausalLMOutput(logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions)\n\n\nclass FlaxWav2Vec2ForAudioFrameClassification(FlaxWav2Vec2PreTrainedModel):\n module_class = FlaxWav2Vec2ForAudioFrameClassificationModule","metadata":{"execution":{"iopub.status.busy":"2023-08-14T06:41:24.010341Z","iopub.execute_input":"2023-08-14T06:41:24.011031Z","iopub.status.idle":"2023-08-14T06:41:24.277458Z","shell.execute_reply.started":"2023-08-14T06:41:24.010965Z","shell.execute_reply":"2023-08-14T06:41:24.275797Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"model = FlaxWav2Vec2ForAudioFrameClassification.from_pretrained(\"/kaggle/working/torch_mms_alignment_model\", from_pt=True)","metadata":{"execution":{"iopub.status.busy":"2023-08-14T06:43:25.695981Z","iopub.execute_input":"2023-08-14T06:43:25.696509Z","iopub.status.idle":"2023-08-14T06:43:45.940392Z","shell.execute_reply.started":"2023-08-14T06:43:25.696462Z","shell.execute_reply":"2023-08-14T06:43:45.938162Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"model.save_pretrained(\"flax_mms_alignment_model\")","metadata":{"execution":{"iopub.status.busy":"2023-08-14T06:56:13.270450Z","iopub.execute_input":"2023-08-14T06:56:13.271794Z","iopub.status.idle":"2023-08-14T06:56:19.317011Z","shell.execute_reply.started":"2023-08-14T06:56:13.271740Z","shell.execute_reply":"2023-08-14T06:56:19.315654Z"},"trusted":true},"execution_count":18,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}