diff --git "a/chuvash_training_script.ipynb" "b/chuvash_training_script.ipynb"
new file mode 100644--- /dev/null
+++ "b/chuvash_training_script.ipynb"
@@ -0,0 +1,2229 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# HuggingFace challenge - Debugger notebook\n",
+ "Run this notebook to verify your libraries versions, check GPU config and run a quick training"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "metadata": {
+ "id": "T2utsYSKszvv"
+ },
+ "outputs": [],
+ "source": [
+ "import platform\n",
+ "import multiprocessing\n",
+ "\n",
+ "import torch\n",
+ "import transformers\n",
+ "import datasets\n",
+ "\n",
+ "import soundfile"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Print main infos"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "5P6I-W9ts-kR",
+ "outputId": "939bd550-1486-46a6-8371-e82ada0f448c"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Platform: Linux-5.11.0-37-generic-x86_64-with-glibc2.10\n",
+ "CPU cores: 60\n",
+ "Python version: 3.8.8\n",
+ "PyTorch version: 1.10.1+cu102\n",
+ "GPU is visible: True\n",
+ "Transformers version: 4.16.0.dev0\n",
+ "Datasets version: 1.17.1.dev0\n",
+ "soundfile version: 0.10.3\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(f\"Platform: {platform.platform()}\")\n",
+ "print(f\"CPU cores: {multiprocessing.cpu_count()}\")\n",
+ "\n",
+ "print(f\"Python version: {platform.python_version()}\")\n",
+ "\n",
+ "print(f\"PyTorch version: {torch.__version__}\")\n",
+ "print(f\"GPU is visible: {torch.cuda.is_available()}\")\n",
+ "\n",
+ "print(f\"Transformers version: {transformers.__version__}\")\n",
+ "print(f\"Datasets version: {datasets.__version__}\")\n",
+ "\n",
+ "print(f\"soundfile version: {soundfile.__version__}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Check your GPU informations (if any)\n",
+ "If you launched an AI Training job with GPU resources, they should be listed below (Tesla V100s 32GB).\n",
+ "Driver and CUDA version "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "YT7fRnKctggU",
+ "outputId": "f355a3e0-20da-489f-bd1f-5e508e792a68"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Wed Jan 26 14:03:08 2022 \n",
+ "+-----------------------------------------------------------------------------+\n",
+ "| NVIDIA-SMI 470.57.02 Driver Version: 470.57.02 CUDA Version: 11.4 |\n",
+ "|-------------------------------+----------------------+----------------------+\n",
+ "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
+ "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
+ "| | | MIG M. |\n",
+ "|===============================+======================+======================|\n",
+ "| 0 Tesla V100S-PCI... Off | 00000000:00:06.0 Off | 0 |\n",
+ "| N/A 40C P0 52W / 250W | 21343MiB / 32510MiB | 0% Default |\n",
+ "| | | N/A |\n",
+ "+-------------------------------+----------------------+----------------------+\n",
+ " \n",
+ "+-----------------------------------------------------------------------------+\n",
+ "| Processes: |\n",
+ "| GPU GI CI PID Type Process name GPU Memory |\n",
+ "| ID ID Usage |\n",
+ "|=============================================================================|\n",
+ "+-----------------------------------------------------------------------------+\n"
+ ]
+ }
+ ],
+ "source": [
+ "!nvidia-smi"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2fa897b4afc049229144599af9e3f807",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox(children=(HTML(value='
\\n] 29.64K --.-KB/s in 0.001s \n",
+ "\n",
+ "2022-01-22 15:01:09 (20.1 MB/s) - ‘run_speech_recognition_ctc.py’ saved [30348/30348]\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "!wget -O run_speech_recognition_ctc.py https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# \t--learning_rate=\"7.5e-5\" \\\n",
+ "# 84.5"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Mz4bubhxxsad",
+ "outputId": "23398525-cc19-43c2-9fec-497e06214f29"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "remove special characters from datasets: 100%|█| 1794/1794 [00:00<00:00, 4500.40\n",
+ "remove special characters from datasets: 100%|█| 810/810 [00:00<00:00, 7132.38ex\n",
+ "loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6\n",
+ "Model config Wav2Vec2Config {\n",
+ " \"_name_or_path\": \"facebook/wav2vec2-xls-r-300m\",\n",
+ " \"activation_dropout\": 0.0,\n",
+ " \"adapter_kernel_size\": 3,\n",
+ " \"adapter_stride\": 2,\n",
+ " \"add_adapter\": false,\n",
+ " \"apply_spec_augment\": true,\n",
+ " \"architectures\": [\n",
+ " \"Wav2Vec2ForPreTraining\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.1,\n",
+ " \"bos_token_id\": 1,\n",
+ " \"classifier_proj_size\": 256,\n",
+ " \"codevector_dim\": 768,\n",
+ " \"contrastive_logits_temperature\": 0.1,\n",
+ " \"conv_bias\": true,\n",
+ " \"conv_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512\n",
+ " ],\n",
+ " \"conv_kernel\": [\n",
+ " 10,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"conv_stride\": [\n",
+ " 5,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"ctc_loss_reduction\": \"sum\",\n",
+ " \"ctc_zero_infinity\": false,\n",
+ " \"diversity_loss_weight\": 0.1,\n",
+ " \"do_stable_layer_norm\": true,\n",
+ " \"eos_token_id\": 2,\n",
+ " \"feat_extract_activation\": \"gelu\",\n",
+ " \"feat_extract_dropout\": 0.0,\n",
+ " \"feat_extract_norm\": \"layer\",\n",
+ " \"feat_proj_dropout\": 0.1,\n",
+ " \"feat_quantizer_dropout\": 0.0,\n",
+ " \"final_dropout\": 0.0,\n",
+ " \"gradient_checkpointing\": false,\n",
+ " \"hidden_act\": \"gelu\",\n",
+ " \"hidden_dropout\": 0.1,\n",
+ " \"hidden_size\": 1024,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4096,\n",
+ " \"layer_norm_eps\": 1e-05,\n",
+ " \"layerdrop\": 0.1,\n",
+ " \"mask_feature_length\": 10,\n",
+ " \"mask_feature_min_masks\": 0,\n",
+ " \"mask_feature_prob\": 0.0,\n",
+ " \"mask_time_length\": 10,\n",
+ " \"mask_time_min_masks\": 2,\n",
+ " \"mask_time_prob\": 0.075,\n",
+ " \"model_type\": \"wav2vec2\",\n",
+ " \"num_adapter_layers\": 3,\n",
+ " \"num_attention_heads\": 16,\n",
+ " \"num_codevector_groups\": 2,\n",
+ " \"num_codevectors_per_group\": 320,\n",
+ " \"num_conv_pos_embedding_groups\": 16,\n",
+ " \"num_conv_pos_embeddings\": 128,\n",
+ " \"num_feat_extract_layers\": 7,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_negatives\": 100,\n",
+ " \"output_hidden_size\": 1024,\n",
+ " \"pad_token_id\": 0,\n",
+ " \"proj_codevector_dim\": 768,\n",
+ " \"tdnn_dilation\": [\n",
+ " 1,\n",
+ " 2,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"tdnn_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 1500\n",
+ " ],\n",
+ " \"tdnn_kernel\": [\n",
+ " 5,\n",
+ " 3,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"torch_dtype\": \"float32\",\n",
+ " \"transformers_version\": \"4.16.0.dev0\",\n",
+ " \"use_weighted_layer_sum\": false,\n",
+ " \"vocab_size\": 32,\n",
+ " \"xvector_output_dim\": 512\n",
+ "}\n",
+ "\n",
+ "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 11.99ba/s]\n",
+ "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 35.85ba/s]\n",
+ "Didn't find file ./wav2vec2-large-xls-r-300m-chuvash/tokenizer.json. We won't load it.\n",
+ "loading file ./wav2vec2-large-xls-r-300m-chuvash/vocab.json\n",
+ "loading file ./wav2vec2-large-xls-r-300m-chuvash/tokenizer_config.json\n",
+ "loading file ./wav2vec2-large-xls-r-300m-chuvash/added_tokens.json\n",
+ "loading file ./wav2vec2-large-xls-r-300m-chuvash/special_tokens_map.json\n",
+ "loading file None\n",
+ "Adding to the vocabulary\n",
+ "Adding to the vocabulary\n",
+ "loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6\n",
+ "Model config Wav2Vec2Config {\n",
+ " \"_name_or_path\": \"facebook/wav2vec2-xls-r-300m\",\n",
+ " \"activation_dropout\": 0.0,\n",
+ " \"adapter_kernel_size\": 3,\n",
+ " \"adapter_stride\": 2,\n",
+ " \"add_adapter\": false,\n",
+ " \"apply_spec_augment\": true,\n",
+ " \"architectures\": [\n",
+ " \"Wav2Vec2ForPreTraining\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.1,\n",
+ " \"bos_token_id\": 1,\n",
+ " \"classifier_proj_size\": 256,\n",
+ " \"codevector_dim\": 768,\n",
+ " \"contrastive_logits_temperature\": 0.1,\n",
+ " \"conv_bias\": true,\n",
+ " \"conv_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512\n",
+ " ],\n",
+ " \"conv_kernel\": [\n",
+ " 10,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"conv_stride\": [\n",
+ " 5,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"ctc_loss_reduction\": \"sum\",\n",
+ " \"ctc_zero_infinity\": false,\n",
+ " \"diversity_loss_weight\": 0.1,\n",
+ " \"do_stable_layer_norm\": true,\n",
+ " \"eos_token_id\": 2,\n",
+ " \"feat_extract_activation\": \"gelu\",\n",
+ " \"feat_extract_dropout\": 0.0,\n",
+ " \"feat_extract_norm\": \"layer\",\n",
+ " \"feat_proj_dropout\": 0.1,\n",
+ " \"feat_quantizer_dropout\": 0.0,\n",
+ " \"final_dropout\": 0.0,\n",
+ " \"gradient_checkpointing\": false,\n",
+ " \"hidden_act\": \"gelu\",\n",
+ " \"hidden_dropout\": 0.1,\n",
+ " \"hidden_size\": 1024,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4096,\n",
+ " \"layer_norm_eps\": 1e-05,\n",
+ " \"layerdrop\": 0.1,\n",
+ " \"mask_feature_length\": 10,\n",
+ " \"mask_feature_min_masks\": 0,\n",
+ " \"mask_feature_prob\": 0.0,\n",
+ " \"mask_time_length\": 10,\n",
+ " \"mask_time_min_masks\": 2,\n",
+ " \"mask_time_prob\": 0.075,\n",
+ " \"model_type\": \"wav2vec2\",\n",
+ " \"num_adapter_layers\": 3,\n",
+ " \"num_attention_heads\": 16,\n",
+ " \"num_codevector_groups\": 2,\n",
+ " \"num_codevectors_per_group\": 320,\n",
+ " \"num_conv_pos_embedding_groups\": 16,\n",
+ " \"num_conv_pos_embeddings\": 128,\n",
+ " \"num_feat_extract_layers\": 7,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_negatives\": 100,\n",
+ " \"output_hidden_size\": 1024,\n",
+ " \"pad_token_id\": 0,\n",
+ " \"proj_codevector_dim\": 768,\n",
+ " \"tdnn_dilation\": [\n",
+ " 1,\n",
+ " 2,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"tdnn_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 1500\n",
+ " ],\n",
+ " \"tdnn_kernel\": [\n",
+ " 5,\n",
+ " 3,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"torch_dtype\": \"float32\",\n",
+ " \"transformers_version\": \"4.16.0.dev0\",\n",
+ " \"use_weighted_layer_sum\": false,\n",
+ " \"vocab_size\": 32,\n",
+ " \"xvector_output_dim\": 512\n",
+ "}\n",
+ "\n",
+ "loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/preprocessor_config.json from cache at /workspace/.cache/huggingface/transformers/6fb028b95b394059e7d3b367bbca2382b576c66aebe896f04d2cd34e1b575f5b.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326\n",
+ "Feature extractor Wav2Vec2FeatureExtractor {\n",
+ " \"do_normalize\": true,\n",
+ " \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n",
+ " \"feature_size\": 1,\n",
+ " \"padding_side\": \"right\",\n",
+ " \"padding_value\": 0,\n",
+ " \"return_attention_mask\": true,\n",
+ " \"sampling_rate\": 16000\n",
+ "}\n",
+ "\n",
+ "loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/pytorch_model.bin from cache at /workspace/.cache/huggingface/transformers/1e6a6507f3b689035cd4b247e2a37c154e27f39143f31357a49b4e38baeccc36.1edb32803799e27ed554eb7dd935f6745b1a0b17b0ea256442fe24db6eb546cd\n",
+ "Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['quantizer.weight_proj.weight', 'quantizer.codevectors', 'project_q.bias', 'quantizer.weight_proj.bias', 'project_hid.bias', 'project_q.weight', 'project_hid.weight']\n",
+ "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
+ "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
+ "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
+ "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+ "preprocess datasets: 100%|█████████████████| 1794/1794 [00:15<00:00, 114.24ex/s]\n",
+ "preprocess datasets: 100%|███████████████████| 810/810 [00:07<00:00, 108.63ex/s]\n",
+ "100%|████████████████████████████████████████████| 2/2 [00:00<00:00, 713.44ba/s]\n",
+ "100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 742.75ba/s]\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ "tokenizer config file saved in ./wav2vec2-large-xls-r-300m-chuvash/tokenizer_config.json\n",
+ "Special tokens file saved in ./wav2vec2-large-xls-r-300m-chuvash/special_tokens_map.json\n",
+ "added tokens file saved in ./wav2vec2-large-xls-r-300m-chuvash/added_tokens.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/config.json\n",
+ "loading feature extractor configuration file ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ "loading configuration file ./wav2vec2-large-xls-r-300m-chuvash/config.json\n",
+ "Model config Wav2Vec2Config {\n",
+ " \"_name_or_path\": \"./wav2vec2-large-xls-r-300m-chuvash\",\n",
+ " \"activation_dropout\": 0.1,\n",
+ " \"adapter_kernel_size\": 3,\n",
+ " \"adapter_stride\": 2,\n",
+ " \"add_adapter\": false,\n",
+ " \"apply_spec_augment\": true,\n",
+ " \"architectures\": [\n",
+ " \"Wav2Vec2ForPreTraining\"\n",
+ " ],\n",
+ " \"attention_dropout\": 0.0,\n",
+ " \"bos_token_id\": 1,\n",
+ " \"classifier_proj_size\": 256,\n",
+ " \"codevector_dim\": 768,\n",
+ " \"contrastive_logits_temperature\": 0.1,\n",
+ " \"conv_bias\": true,\n",
+ " \"conv_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512\n",
+ " ],\n",
+ " \"conv_kernel\": [\n",
+ " 10,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 3,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"conv_stride\": [\n",
+ " 5,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2,\n",
+ " 2\n",
+ " ],\n",
+ " \"ctc_loss_reduction\": \"mean\",\n",
+ " \"ctc_zero_infinity\": false,\n",
+ " \"diversity_loss_weight\": 0.1,\n",
+ " \"do_stable_layer_norm\": true,\n",
+ " \"eos_token_id\": 2,\n",
+ " \"feat_extract_activation\": \"gelu\",\n",
+ " \"feat_extract_dropout\": 0.0,\n",
+ " \"feat_extract_norm\": \"layer\",\n",
+ " \"feat_proj_dropout\": 0.0,\n",
+ " \"feat_quantizer_dropout\": 0.0,\n",
+ " \"final_dropout\": 0.0,\n",
+ " \"hidden_act\": \"gelu\",\n",
+ " \"hidden_dropout\": 0.0,\n",
+ " \"hidden_size\": 1024,\n",
+ " \"initializer_range\": 0.02,\n",
+ " \"intermediate_size\": 4096,\n",
+ " \"layer_norm_eps\": 1e-05,\n",
+ " \"layerdrop\": 0.0,\n",
+ " \"mask_feature_length\": 64,\n",
+ " \"mask_feature_min_masks\": 0,\n",
+ " \"mask_feature_prob\": 0.25,\n",
+ " \"mask_time_length\": 10,\n",
+ " \"mask_time_min_masks\": 2,\n",
+ " \"mask_time_prob\": 0.75,\n",
+ " \"model_type\": \"wav2vec2\",\n",
+ " \"num_adapter_layers\": 3,\n",
+ " \"num_attention_heads\": 16,\n",
+ " \"num_codevector_groups\": 2,\n",
+ " \"num_codevectors_per_group\": 320,\n",
+ " \"num_conv_pos_embedding_groups\": 16,\n",
+ " \"num_conv_pos_embeddings\": 128,\n",
+ " \"num_feat_extract_layers\": 7,\n",
+ " \"num_hidden_layers\": 24,\n",
+ " \"num_negatives\": 100,\n",
+ " \"output_hidden_size\": 1024,\n",
+ " \"pad_token_id\": 43,\n",
+ " \"proj_codevector_dim\": 768,\n",
+ " \"tdnn_dilation\": [\n",
+ " 1,\n",
+ " 2,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"tdnn_dim\": [\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 512,\n",
+ " 1500\n",
+ " ],\n",
+ " \"tdnn_kernel\": [\n",
+ " 5,\n",
+ " 3,\n",
+ " 3,\n",
+ " 1,\n",
+ " 1\n",
+ " ],\n",
+ " \"torch_dtype\": \"float32\",\n",
+ " \"transformers_version\": \"4.16.0.dev0\",\n",
+ " \"use_weighted_layer_sum\": false,\n",
+ " \"vocab_size\": 46,\n",
+ " \"xvector_output_dim\": 512\n",
+ "}\n",
+ "\n",
+ "loading feature extractor configuration file ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ "Feature extractor Wav2Vec2FeatureExtractor {\n",
+ " \"do_normalize\": true,\n",
+ " \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n",
+ " \"feature_size\": 1,\n",
+ " \"padding_side\": \"right\",\n",
+ " \"padding_value\": 0,\n",
+ " \"return_attention_mask\": true,\n",
+ " \"sampling_rate\": 16000\n",
+ "}\n",
+ "\n",
+ "Didn't find file ./wav2vec2-large-xls-r-300m-chuvash/tokenizer.json. We won't load it.\n",
+ "loading file ./wav2vec2-large-xls-r-300m-chuvash/vocab.json\n",
+ "loading file ./wav2vec2-large-xls-r-300m-chuvash/tokenizer_config.json\n",
+ "loading file ./wav2vec2-large-xls-r-300m-chuvash/added_tokens.json\n",
+ "loading file ./wav2vec2-large-xls-r-300m-chuvash/special_tokens_map.json\n",
+ "loading file None\n",
+ "Adding to the vocabulary\n",
+ "Adding to the vocabulary\n",
+ "/workspace/votic_training/./wav2vec2-large-xls-r-300m-chuvash is already a clone of https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-chuvash. Make sure you pull the latest changes with `repo.git_pull()`.\n",
+ "Using amp half precision backend\n",
+ "The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "/opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+ " warnings.warn(\n",
+ "***** Running training *****\n",
+ " Num examples = 1794\n",
+ " Num Epochs = 100\n",
+ " Instantaneous batch size per device = 32\n",
+ " Total train batch size (w. parallel, distributed & accumulation) = 32\n",
+ " Gradient Accumulation steps = 1\n",
+ " Total optimization steps = 5700\n",
+ " 9%|███▎ | 500/5700 [12:20<2:24:45, 1.67s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 810\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/26 [00:00, ?it/s]\u001b[A\n",
+ " 8%|███▍ | 2/26 [00:01<00:16, 1.49it/s]\u001b[A\n",
+ " 12%|█████ | 3/26 [00:02<00:22, 1.03it/s]\u001b[A\n",
+ " 15%|██████▊ | 4/26 [00:04<00:25, 1.14s/it]\u001b[A\n",
+ " 19%|████████▍ | 5/26 [00:05<00:25, 1.21s/it]\u001b[A\n",
+ " 23%|██████████▏ | 6/26 [00:06<00:25, 1.26s/it]\u001b[A\n",
+ " 27%|███████████▊ | 7/26 [00:08<00:24, 1.29s/it]\u001b[A\n",
+ " 31%|█████████████▌ | 8/26 [00:09<00:22, 1.26s/it]\u001b[A\n",
+ " 35%|███████████████▏ | 9/26 [00:10<00:22, 1.31s/it]\u001b[A\n",
+ " 38%|████████████████▌ | 10/26 [00:12<00:20, 1.30s/it]\u001b[A\n",
+ " 42%|██████████████████▏ | 11/26 [00:13<00:19, 1.29s/it]\u001b[A\n",
+ " 46%|███████████████████▊ | 12/26 [00:14<00:18, 1.29s/it]\u001b[A\n",
+ " 50%|█████████████████████▌ | 13/26 [00:15<00:16, 1.30s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 14/26 [00:17<00:15, 1.29s/it]\u001b[A\n",
+ " 58%|████████████████████████▊ | 15/26 [00:18<00:14, 1.33s/it]\u001b[A\n",
+ " 62%|██████████████████████████▍ | 16/26 [00:20<00:13, 1.33s/it]\u001b[A\n",
+ " 65%|████████████████████████████ | 17/26 [00:21<00:12, 1.35s/it]\u001b[A\n",
+ " 69%|█████████████████████████████▊ | 18/26 [00:22<00:10, 1.29s/it]\u001b[A\n",
+ " 73%|███████████████████████████████▍ | 19/26 [00:23<00:09, 1.32s/it]\u001b[A\n",
+ " 77%|█████████████████████████████████ | 20/26 [00:25<00:07, 1.33s/it]\u001b[A\n",
+ " 81%|██████████████████████████████████▋ | 21/26 [00:26<00:06, 1.28s/it]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 22/26 [00:27<00:05, 1.28s/it]\u001b[A\n",
+ " 88%|██████████████████████████████████████ | 23/26 [00:29<00:03, 1.31s/it]\u001b[A\n",
+ " 92%|███████████████████████████████████████▋ | 24/26 [00:30<00:02, 1.37s/it]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 25/26 [00:32<00:01, 1.40s/it]\u001b[A\n",
+ " \u001b[A\n",
+ " 9%|███▎ | 500/5700 [12:54<2:24:45, 1.67s/it]\n",
+ "100%|███████████████████████████████████████████| 26/26 [00:32<00:00, 1.04s/it]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ " 18%|██████▍ | 1000/5700 [25:35<2:12:19, 1.69s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 810\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/26 [00:00, ?it/s]\u001b[A\n",
+ " 8%|███▍ | 2/26 [00:01<00:16, 1.48it/s]\u001b[A\n",
+ " 12%|█████ | 3/26 [00:02<00:22, 1.02it/s]\u001b[A\n",
+ " 15%|██████��� | 4/26 [00:04<00:25, 1.14s/it]\u001b[A\n",
+ " 19%|████████▍ | 5/26 [00:05<00:25, 1.23s/it]\u001b[A\n",
+ " 23%|██████████▏ | 6/26 [00:06<00:25, 1.26s/it]\u001b[A\n",
+ " 27%|███████████▊ | 7/26 [00:08<00:24, 1.29s/it]\u001b[A\n",
+ " 31%|█████████████▌ | 8/26 [00:09<00:22, 1.25s/it]\u001b[A\n",
+ " 35%|███████████████▏ | 9/26 [00:10<00:22, 1.31s/it]\u001b[A\n",
+ " 38%|████████████████▌ | 10/26 [00:12<00:21, 1.31s/it]\u001b[A\n",
+ " 42%|██████████████████▏ | 11/26 [00:13<00:19, 1.30s/it]\u001b[A\n",
+ " 46%|███████████████████▊ | 12/26 [00:14<00:18, 1.29s/it]\u001b[A\n",
+ " 50%|█████████████████████▌ | 13/26 [00:16<00:16, 1.30s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 14/26 [00:17<00:15, 1.28s/it]\u001b[A\n",
+ " 58%|████████████████████████▊ | 15/26 [00:18<00:14, 1.33s/it]\u001b[A\n",
+ " 62%|██████████████████████████▍ | 16/26 [00:20<00:13, 1.33s/it]\u001b[A\n",
+ " 65%|████████████████████████████ | 17/26 [00:21<00:12, 1.35s/it]\u001b[A\n",
+ " 69%|█████████████████████████████▊ | 18/26 [00:22<00:10, 1.30s/it]\u001b[A\n",
+ " 73%|███████████████████████████████▍ | 19/26 [00:24<00:09, 1.32s/it]\u001b[A\n",
+ " 77%|█████████████████████████████████ | 20/26 [00:25<00:08, 1.34s/it]\u001b[A\n",
+ " 81%|██████████████████████████████████▋ | 21/26 [00:26<00:06, 1.29s/it]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 22/26 [00:27<00:05, 1.29s/it]\u001b[A\n",
+ " 88%|██████████████████████████████████████ | 23/26 [00:29<00:03, 1.32s/it]\u001b[A\n",
+ " 92%|███████████████████████████████████████▋ | 24/26 [00:30<00:02, 1.37s/it]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 25/26 [00:32<00:01, 1.39s/it]\u001b[A\n",
+ " \u001b[A\n",
+ " 18%|██████▍ | 1000/5700 [26:09<2:12:19, 1.69s/it]\n",
+ "100%|███████████████████████████████████████████| 26/26 [00:32<00:00, 1.03s/it]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-1000\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-1000/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-1000/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-1000/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ " 26%|█████████▋ | 1500/5700 [39:54<1:59:12, 1.70s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 810\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/26 [00:00, ?it/s]\u001b[A\n",
+ " 8%|███▍ | 2/26 [00:01<00:16, 1.43it/s]\u001b[A\n",
+ " 12%|█████ | 3/26 [00:02<00:23, 1.02s/it]\u001b[A\n",
+ " 15%|██████▊ | 4/26 [00:04<00:25, 1.18s/it]\u001b[A\n",
+ " 19%|████████▍ | 5/26 [00:05<00:26, 1.24s/it]\u001b[A\n",
+ " 23%|██████████▏ | 6/26 [00:07<00:25, 1.28s/it]\u001b[A\n",
+ " 27%|███████████▊ | 7/26 [00:08<00:24, 1.31s/it]\u001b[A\n",
+ " 31%|█████████████▌ | 8/26 [00:09<00:22, 1.28s/it]\u001b[A\n",
+ " 35%|███████████████▏ | 9/26 [00:11<00:22, 1.32s/it]\u001b[A\n",
+ " 38%|████████████████▌ | 10/26 [00:12<00:20, 1.30s/it]\u001b[A\n",
+ " 42%|██████████████████▏ | 11/26 [00:13<00:19, 1.30s/it]\u001b[A\n",
+ " 46%|███████████████████▊ | 12/26 [00:14<00:18, 1.30s/it]\u001b[A\n",
+ " 50%|█████████████████████▌ | 13/26 [00:16<00:17, 1.32s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 14/26 [00:17<00:15, 1.31s/it]\u001b[A\n",
+ " 58%|████████████████████████▊ | 15/26 [00:18<00:14, 1.35s/it]\u001b[A\n",
+ " 62%|██████████████████████████▍ | 16/26 [00:20<00:13, 1.34s/it]\u001b[A\n",
+ " 65%|████████████████████████████ | 17/26 [00:21<00:12, 1.35s/it]\u001b[A\n",
+ " 69%|█████████████████████████████▊ | 18/26 [00:22<00:10, 1.29s/it]\u001b[A\n",
+ " 73%|███████████████████████████████▍ | 19/26 [00:24<00:09, 1.33s/it]\u001b[A\n",
+ " 77%|█████████████████████████████████ | 20/26 [00:25<00:08, 1.34s/it]\u001b[A\n",
+ " 81%|██████████████████████████████████▋ | 21/26 [00:26<00:06, 1.29s/it]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 22/26 [00:28<00:05, 1.29s/it]\u001b[A\n",
+ " 88%|██████████████████████████████████████ | 23/26 [00:29<00:03, 1.32s/it]\u001b[A\n",
+ " 92%|███████████████████████████████████████▋ | 24/26 [00:30<00:02, 1.37s/it]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 25/26 [00:32<00:01, 1.38s/it]\u001b[A\n",
+ " \u001b[A\n",
+ " 26%|█████████▋ | 1500/5700 [40:28<1:59:12, 1.70s/it]\n",
+ "100%|███████████████████████████████████████████| 26/26 [00:33<00:00, 1.02s/it]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-1500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-1500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-1500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-1500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-chuvash/checkpoint-500] due to args.save_total_limit\n",
+ " 35%|████████████▉ | 2000/5700 [54:08<1:44:34, 1.70s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 810\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/26 [00:00, ?it/s]\u001b[A\n",
+ " 8%|███▍ | 2/26 [00:01<00:16, 1.49it/s]\u001b[A\n",
+ " 12%|█████ | 3/26 [00:02<00:22, 1.02it/s]\u001b[A\n",
+ " 15%|██████▊ | 4/26 [00:04<00:25, 1.14s/it]\u001b[A\n",
+ " 19%|████████▍ | 5/26 [00:05<00:25, 1.22s/it]\u001b[A\n",
+ " 23%|██████████▏ | 6/26 [00:06<00:25, 1.25s/it]\u001b[A\n",
+ " 27%|███████████▊ | 7/26 [00:08<00:24, 1.27s/it]\u001b[A\n",
+ " 31%|█████████████▌ | 8/26 [00:09<00:22, 1.25s/it]\u001b[A\n",
+ " 35%|███████████████▏ | 9/26 [00:10<00:22, 1.31s/it]\u001b[A\n",
+ " 38%|████████████████▌ | 10/26 [00:12<00:20, 1.29s/it]\u001b[A\n",
+ " 42%|██████████████████▏ | 11/26 [00:13<00:19, 1.28s/it]\u001b[A\n",
+ " 46%|███████████████████▊ | 12/26 [00:14<00:17, 1.28s/it]\u001b[A\n",
+ " 50%|█████████████████████▌ | 13/26 [00:15<00:16, 1.29s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 14/26 [00:17<00:15, 1.30s/it]\u001b[A\n",
+ " 58%|████████████████████████▊ | 15/26 [00:18<00:14, 1.36s/it]\u001b[A\n",
+ " 62%|██████████████████████████▍ | 16/26 [00:19<00:13, 1.34s/it]\u001b[A\n",
+ " 65%|████████████████████████████ | 17/26 [00:21<00:12, 1.37s/it]\u001b[A\n",
+ " 69%|█████████████████████████████▊ | 18/26 [00:22<00:10, 1.31s/it]\u001b[A\n",
+ " 73%|███████████████████████████████▍ | 19/26 [00:23<00:09, 1.32s/it]\u001b[A\n",
+ " 77%|█████████████████████████████████ | 20/26 [00:25<00:07, 1.32s/it]\u001b[A\n",
+ " 81%|██████████████████████████████████▋ | 21/26 [00:26<00:06, 1.28s/it]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 22/26 [00:27<00:05, 1.28s/it]\u001b[A\n",
+ " 88%|██████████████████████████████████████ | 23/26 [00:29<00:03, 1.31s/it]\u001b[A\n",
+ " 92%|███████████████████████████████████████▋ | 24/26 [00:30<00:02, 1.36s/it]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 25/26 [00:32<00:01, 1.38s/it]\u001b[A\n",
+ " \u001b[A\n",
+ " 35%|████████████▉ | 2000/5700 [54:42<1:44:34, 1.70s/it]\n",
+ "100%|███████████████████████████████████████████| 26/26 [00:32<00:00, 1.02s/it]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-2000\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-2000/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-2000/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-2000/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-chuvash/checkpoint-1000] due to args.save_total_limit\n",
+ " 44%|███████████████▎ | 2500/5700 [1:08:22<1:24:59, 1.59s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 810\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/26 [00:00, ?it/s]\u001b[A\n",
+ " 8%|███▍ | 2/26 [00:01<00:15, 1.52it/s]\u001b[A\n",
+ " 12%|█████ | 3/26 [00:02<00:22, 1.04it/s]\u001b[A\n",
+ " 15%|██████▊ | 4/26 [00:04<00:24, 1.13s/it]\u001b[A\n",
+ " 19%|████████▍ | 5/26 [00:05<00:25, 1.20s/it]\u001b[A\n",
+ " 23%|██████████▏ | 6/26 [00:06<00:25, 1.27s/it]\u001b[A\n",
+ " 27%|███████████▊ | 7/26 [00:08<00:24, 1.30s/it]\u001b[A\n",
+ " 31%|█████████████▌ | 8/26 [00:09<00:22, 1.26s/it]\u001b[A\n",
+ " 35%|███████████████▏ | 9/26 [00:10<00:22, 1.31s/it]\u001b[A\n",
+ " 38%|████████████████▌ | 10/26 [00:12<00:20, 1.29s/it]\u001b[A\n",
+ " 42%|██████████████████▏ | 11/26 [00:13<00:19, 1.31s/it]\u001b[A\n",
+ " 46%|███████████████████▊ | 12/26 [00:14<00:18, 1.30s/it]\u001b[A\n",
+ " 50%|█████████████████████▌ | 13/26 [00:16<00:16, 1.31s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 14/26 [00:17<00:15, 1.30s/it]\u001b[A\n",
+ " 58%|████████████████████████▊ | 15/26 [00:18<00:14, 1.33s/it]\u001b[A\n",
+ " 62%|██████████████████████████▍ | 16/26 [00:20<00:13, 1.32s/it]\u001b[A\n",
+ " 65%|████████████████████████████ | 17/26 [00:21<00:12, 1.34s/it]\u001b[A\n",
+ " 69%|█████████████████████████████▊ | 18/26 [00:22<00:10, 1.30s/it]\u001b[A\n",
+ " 73%|███████████████████████████████▍ | 19/26 [00:23<00:09, 1.32s/it]\u001b[A\n",
+ " 77%|█████████████████████████████████ | 20/26 [00:25<00:07, 1.32s/it]\u001b[A\n",
+ " 81%|██████████████████████████████████▋ | 21/26 [00:26<00:06, 1.31s/it]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 22/26 [00:27<00:05, 1.29s/it]\u001b[A\n",
+ " 88%|██████████████████████████████████████ | 23/26 [00:29<00:04, 1.35s/it]\u001b[A\n",
+ " 92%|███████████████████████████████████████▋ | 24/26 [00:30<00:02, 1.43s/it]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 25/26 [00:32<00:01, 1.44s/it]\u001b[A\n",
+ " \u001b[A\n",
+ " 44%|███████████████▎ | 2500/5700 [1:08:56<1:24:59, 1.59s/it]\n",
+ "100%|███████████████████████████████████████████| 26/26 [00:33<00:00, 1.06s/it]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-2500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-2500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-2500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-2500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-chuvash/checkpoint-1500] due to args.save_total_limit\n",
+ " 53%|██████████████████▍ | 3000/5700 [1:22:33<1:08:14, 1.52s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 810\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/26 [00:00, ?it/s]\u001b[A\n",
+ " 8%|███▍ | 2/26 [00:01<00:16, 1.50it/s]\u001b[A\n",
+ " 12%|█████ | 3/26 [00:02<00:22, 1.01it/s]\u001b[A\n",
+ " 15%|██████▊ | 4/26 [00:04<00:25, 1.15s/it]\u001b[A\n",
+ " 19%|████████▍ | 5/26 [00:05<00:26, 1.26s/it]\u001b[A\n",
+ " 23%|██████████▏ | 6/26 [00:06<00:25, 1.28s/it]\u001b[A\n",
+ " 27%|███████████▊ | 7/26 [00:08<00:24, 1.29s/it]\u001b[A\n",
+ " 31%|█████████████▌ | 8/26 [00:09<00:22, 1.25s/it]\u001b[A\n",
+ " 35%|███████████████▏ | 9/26 [00:10<00:22, 1.30s/it]\u001b[A\n",
+ " 38%|████████████████▌ | 10/26 [00:12<00:20, 1.28s/it]\u001b[A\n",
+ " 42%|██████████████████▏ | 11/26 [00:13<00:19, 1.28s/it]\u001b[A\n",
+ " 46%|███████████████████▊ | 12/26 [00:14<00:17, 1.27s/it]\u001b[A\n",
+ " 50%|█████████████████████▌ | 13/26 [00:15<00:16, 1.29s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 14/26 [00:17<00:15, 1.28s/it]\u001b[A\n",
+ " 58%|████████████████████████▊ | 15/26 [00:18<00:14, 1.32s/it]\u001b[A\n",
+ " 62%|██████████████████████████▍ | 16/26 [00:19<00:13, 1.31s/it]\u001b[A\n",
+ " 65%|████████████████████████████ | 17/26 [00:21<00:11, 1.33s/it]\u001b[A\n",
+ " 69%|█████████████████████████████▊ | 18/26 [00:22<00:10, 1.28s/it]\u001b[A\n",
+ " 73%|███████████████████████████████▍ | 19/26 [00:23<00:09, 1.30s/it]\u001b[A\n",
+ " 77%|█████████████████████████████████ | 20/26 [00:25<00:07, 1.31s/it]\u001b[A\n",
+ " 81%|██████████████████████████████████▋ | 21/26 [00:26<00:06, 1.27s/it]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 22/26 [00:27<00:05, 1.28s/it]\u001b[A\n",
+ " 88%|██████████████████████████████████████ | 23/26 [00:29<00:03, 1.31s/it]\u001b[A\n",
+ " 92%|███████████████████████████████████████▋ | 24/26 [00:30<00:02, 1.36s/it]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 25/26 [00:31<00:01, 1.38s/it]\u001b[A\n",
+ " \u001b[A\n",
+ " 53%|██████████████████▍ | 3000/5700 [1:23:06<1:08:14, 1.52s/it]\n",
+ "100%|███████████████████████████████████████████| 26/26 [00:32<00:00, 1.02s/it]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-3000\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-3000/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-3000/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-3000/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-chuvash/checkpoint-2000] due to args.save_total_limit\n",
+ " 61%|██████████████████████▋ | 3500/5700 [1:36:37<51:25, 1.40s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 810\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/26 [00:00, ?it/s]\u001b[A\n",
+ " 8%|███▍ | 2/26 [00:01<00:15, 1.52it/s]\u001b[A\n",
+ " 12%|█████ | 3/26 [00:02<00:22, 1.03it/s]\u001b[A\n",
+ " 15%|██████▊ | 4/26 [00:04<00:24, 1.13s/it]\u001b[A\n",
+ " 19%|████████▍ | 5/26 [00:05<00:25, 1.20s/it]\u001b[A\n",
+ " 23%|██████████▏ | 6/26 [00:06<00:24, 1.24s/it]\u001b[A\n",
+ " 27%|███████████▊ | 7/26 [00:08<00:24, 1.27s/it]\u001b[A\n",
+ " 31%|█████████████▌ | 8/26 [00:09<00:22, 1.24s/it]\u001b[A\n",
+ " 35%|███████████████▏ | 9/26 [00:10<00:21, 1.29s/it]\u001b[A\n",
+ " 38%|████████████████▌ | 10/26 [00:11<00:20, 1.28s/it]\u001b[A\n",
+ " 42%|██████████████████▏ | 11/26 [00:13<00:19, 1.28s/it]\u001b[A\n",
+ " 46%|███████████████████▊ | 12/26 [00:14<00:17, 1.27s/it]\u001b[A\n",
+ " 50%|█████████████████████▌ | 13/26 [00:15<00:16, 1.29s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 14/26 [00:17<00:15, 1.29s/it]\u001b[A\n",
+ " 58%|████████████████████████▊ | 15/26 [00:18<00:14, 1.33s/it]\u001b[A\n",
+ " 62%|██████████████████████████▍ | 16/26 [00:19<00:13, 1.33s/it]\u001b[A\n",
+ " 65%|████████████████████████████ | 17/26 [00:21<00:12, 1.34s/it]\u001b[A\n",
+ " 69%|█████████████████████████████▊ | 18/26 [00:22<00:10, 1.29s/it]\u001b[A\n",
+ " 73%|███████████████████████████████▍ | 19/26 [00:23<00:09, 1.31s/it]\u001b[A\n",
+ " 77%|█████████████████████████████████ | 20/26 [00:25<00:07, 1.33s/it]\u001b[A\n",
+ " 81%|██████████████████████████████████▋ | 21/26 [00:26<00:06, 1.28s/it]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 22/26 [00:27<00:05, 1.28s/it]\u001b[A\n",
+ " 88%|██████████████████████████████████████ | 23/26 [00:28<00:03, 1.32s/it]\u001b[A\n",
+ " 92%|███████████████████████████████████████▋ | 24/26 [00:30<00:02, 1.40s/it]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 25/26 [00:31<00:01, 1.41s/it]\u001b[A\n",
+ " \u001b[A\n",
+ " 61%|██████████████████████▋ | 3500/5700 [1:37:11<51:25, 1.40s/it]\n",
+ "100%|███████████████████████████████████████████| 26/26 [00:32<00:00, 1.04s/it]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-3500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-3500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-3500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-3500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ " 70%|█████████████████████████▉ | 4000/5700 [1:50:43<38:45, 1.37s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 810\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/26 [00:00, ?it/s]\u001b[A\n",
+ " 8%|███▍ | 2/26 [00:01<00:15, 1.51it/s]\u001b[A\n",
+ " 12%|█████ | 3/26 [00:02<00:22, 1.03it/s]\u001b[A\n",
+ " 15%|██████▊ | 4/26 [00:04<00:24, 1.14s/it]\u001b[A\n",
+ " 19%|████████▍ | 5/26 [00:05<00:25, 1.21s/it]\u001b[A\n",
+ " 23%|██████████▏ | 6/26 [00:06<00:24, 1.25s/it]\u001b[A\n",
+ " 27%|███████████▊ | 7/26 [00:08<00:24, 1.27s/it]\u001b[A\n",
+ " 31%|█████████████▌ | 8/26 [00:09<00:22, 1.24s/it]\u001b[A\n",
+ " 35%|███████████████▏ | 9/26 [00:10<00:21, 1.29s/it]\u001b[A\n",
+ " 38%|████████████████▌ | 10/26 [00:11<00:20, 1.28s/it]\u001b[A\n",
+ " 42%|██████████████████▏ | 11/26 [00:13<00:19, 1.28s/it]\u001b[A\n",
+ " 46%|███████████████████▊ | 12/26 [00:14<00:17, 1.28s/it]\u001b[A\n",
+ " 50%|█████████████████████▌ | 13/26 [00:15<00:16, 1.29s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 14/26 [00:17<00:15, 1.29s/it]\u001b[A\n",
+ " 58%|████████████████████████▊ | 15/26 [00:18<00:14, 1.33s/it]\u001b[A\n",
+ " 62%|██████████████████████████▍ | 16/26 [00:19<00:13, 1.32s/it]\u001b[A\n",
+ " 65%|████████████████████████████ | 17/26 [00:21<00:12, 1.33s/it]\u001b[A\n",
+ " 69%|█████████████████████████████▊ | 18/26 [00:22<00:10, 1.30s/it]\u001b[A\n",
+ " 73%|███████████████████████████████▍ | 19/26 [00:23<00:09, 1.33s/it]\u001b[A\n",
+ " 77%|█████████████████████████████████ | 20/26 [00:25<00:08, 1.35s/it]\u001b[A\n",
+ " 81%|██████████████████████████████████▋ | 21/26 [00:26<00:06, 1.29s/it]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 22/26 [00:27<00:05, 1.28s/it]\u001b[A\n",
+ " 88%|██████████████████████████████████████ | 23/26 [00:28<00:03, 1.31s/it]\u001b[A\n",
+ " 92%|███████████████████████████████████████▋ | 24/26 [00:30<00:02, 1.35s/it]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 25/26 [00:31<00:01, 1.37s/it]\u001b[A\n",
+ " \u001b[A\n",
+ " 70%|█████████████████████████▉ | 4000/5700 [1:51:16<38:45, 1.37s/it]\n",
+ "100%|███████████████████████████████████████████| 26/26 [00:32<00:00, 1.02s/it]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-4000\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-4000/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-4000/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-4000/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-chuvash/checkpoint-3000] due to args.save_total_limit\n",
+ " 79%|█████████████████████████████▏ | 4500/5700 [2:04:56<23:55, 1.20s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 810\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/26 [00:00, ?it/s]\u001b[A\n",
+ " 8%|███▍ | 2/26 [00:01<00:16, 1.47it/s]\u001b[A\n",
+ " 12%|█████ | 3/26 [00:02<00:22, 1.02it/s]\u001b[A\n",
+ " 15%|██████▊ | 4/26 [00:04<00:25, 1.14s/it]\u001b[A\n",
+ " 19%|████████▍ | 5/26 [00:05<00:25, 1.21s/it]\u001b[A\n",
+ " 23%|██████████▏ | 6/26 [00:06<00:25, 1.26s/it]\u001b[A\n",
+ " 27%|███████████▊ | 7/26 [00:08<00:24, 1.28s/it]\u001b[A\n",
+ " 31%|█████████████▌ | 8/26 [00:09<00:22, 1.24s/it]\u001b[A\n",
+ " 35%|███████████████▏ | 9/26 [00:10<00:21, 1.29s/it]\u001b[A\n",
+ " 38%|████████████████▌ | 10/26 [00:12<00:20, 1.28s/it]\u001b[A\n",
+ " 42%|██████████████████▏ | 11/26 [00:13<00:19, 1.30s/it]\u001b[A\n",
+ " 46%|███████████████████▊ | 12/26 [00:14<00:18, 1.29s/it]\u001b[A\n",
+ " 50%|█████████████████████▌ | 13/26 [00:15<00:16, 1.30s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 14/26 [00:17<00:15, 1.29s/it]\u001b[A\n",
+ " 58%|████████████████████████▊ | 15/26 [00:18<00:14, 1.33s/it]\u001b[A\n",
+ " 62%|██████████████████████████▍ | 16/26 [00:19<00:13, 1.33s/it]\u001b[A\n",
+ " 65%|████████████████████████████ | 17/26 [00:21<00:12, 1.33s/it]\u001b[A\n",
+ " 69%|█████████████████████████████▊ | 18/26 [00:22<00:10, 1.28s/it]\u001b[A\n",
+ " 73%|███████████████████████████████▍ | 19/26 [00:23<00:09, 1.30s/it]\u001b[A\n",
+ " 77%|█████████████████████████████████ | 20/26 [00:25<00:07, 1.31s/it]\u001b[A\n",
+ " 81%|██████████████████████████████████▋ | 21/26 [00:26<00:06, 1.26s/it]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 22/26 [00:27<00:05, 1.26s/it]\u001b[A\n",
+ " 88%|██████████████████████████████████████ | 23/26 [00:28<00:03, 1.29s/it]\u001b[A\n",
+ " 92%|███████████████████████████████████████▋ | 24/26 [00:30<00:02, 1.34s/it]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 25/26 [00:31<00:01, 1.35s/it]\u001b[A\n",
+ " \u001b[A\n",
+ " 79%|█████████████████████████████▏ | 4500/5700 [2:05:30<23:55, 1.20s/it]\n",
+ "100%|███████████████████████████████████████████| 26/26 [00:32<00:00, 1.00s/it]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-4500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-4500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-4500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-4500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-chuvash/checkpoint-3500] due to args.save_total_limit\n",
+ " 88%|████████████████████████████████▍ | 5000/5700 [2:19:06<13:12, 1.13s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 810\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/26 [00:00, ?it/s]\u001b[A\n",
+ " 8%|███▍ | 2/26 [00:01<00:16, 1.49it/s]\u001b[A\n",
+ " 12%|█████ | 3/26 [00:02<00:22, 1.00it/s]\u001b[A\n",
+ " 15%|██████▊ | 4/26 [00:04<00:25, 1.15s/it]\u001b[A\n",
+ " 19%|████████▍ | 5/26 [00:05<00:25, 1.22s/it]\u001b[A\n",
+ " 23%|██████████▏ | 6/26 [00:06<00:25, 1.27s/it]\u001b[A\n",
+ " 27%|███████████▊ | 7/26 [00:08<00:24, 1.30s/it]\u001b[A\n",
+ " 31%|█████████████▌ | 8/26 [00:09<00:22, 1.26s/it]\u001b[A\n",
+ " 35%|███████████████▏ | 9/26 [00:10<00:22, 1.31s/it]\u001b[A\n",
+ " 38%|████████████████▌ | 10/26 [00:12<00:20, 1.31s/it]\u001b[A\n",
+ " 42%|██████████████████▏ | 11/26 [00:13<00:19, 1.31s/it]\u001b[A\n",
+ " 46%|███████████████████▊ | 12/26 [00:14<00:18, 1.32s/it]\u001b[A\n",
+ " 50%|█████████████████████▌ | 13/26 [00:16<00:17, 1.36s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 14/26 [00:17<00:16, 1.35s/it]\u001b[A\n",
+ " 58%|████████████████████████▊ | 15/26 [00:19<00:15, 1.38s/it]\u001b[A\n",
+ " 62%|██████████████████████████▍ | 16/26 [00:20<00:13, 1.37s/it]\u001b[A\n",
+ " 65%|████████████████████████████ | 17/26 [00:21<00:12, 1.37s/it]\u001b[A\n",
+ " 69%|█████████████████████████████▊ | 18/26 [00:22<00:10, 1.30s/it]\u001b[A\n",
+ " 73%|███████████████████████████████▍ | 19/26 [00:24<00:09, 1.32s/it]\u001b[A\n",
+ " 77%|█████████████████████████████████ | 20/26 [00:25<00:08, 1.38s/it]\u001b[A\n",
+ " 81%|██████████████████████████████████▋ | 21/26 [00:27<00:06, 1.33s/it]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 22/26 [00:28<00:05, 1.31s/it]\u001b[A\n",
+ " 88%|██████████████████████████████████████ | 23/26 [00:29<00:04, 1.36s/it]\u001b[A\n",
+ " 92%|███████████████████████████████████████▋ | 24/26 [00:31<00:02, 1.44s/it]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 25/26 [00:32<00:01, 1.43s/it]\u001b[A\n",
+ " \u001b[A\n",
+ " 88%|████████████████████████████████▍ | 5000/5700 [2:19:40<13:12, 1.13s/it]\n",
+ "100%|███████████████████████████████████████████| 26/26 [00:33<00:00, 1.06s/it]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-5000\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-5000/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-5000/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-5000/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-chuvash/checkpoint-4000] due to args.save_total_limit\n",
+ " 96%|███████████████████████████████████▋ | 5500/5700 [2:33:19<03:32, 1.06s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 810\n",
+ " Batch size = 32\n",
+ "\n",
+ " 0%| | 0/26 [00:00, ?it/s]\u001b[A\n",
+ " 8%|███▍ | 2/26 [00:01<00:17, 1.41it/s]\u001b[A\n",
+ " 12%|█████ | 3/26 [00:02<00:22, 1.01it/s]\u001b[A\n",
+ " 15%|██████▊ | 4/26 [00:04<00:25, 1.14s/it]\u001b[A\n",
+ " 19%|████████▍ | 5/26 [00:05<00:25, 1.22s/it]\u001b[A\n",
+ " 23%|██████████▏ | 6/26 [00:06<00:25, 1.25s/it]\u001b[A\n",
+ " 27%|███████████▊ | 7/26 [00:08<00:25, 1.34s/it]\u001b[A\n",
+ " 31%|█████████████▌ | 8/26 [00:09<00:23, 1.29s/it]\u001b[A\n",
+ " 35%|███████████████▏ | 9/26 [00:11<00:22, 1.33s/it]\u001b[A\n",
+ " 38%|████████████████▌ | 10/26 [00:12<00:20, 1.31s/it]\u001b[A\n",
+ " 42%|██████████████████▏ | 11/26 [00:13<00:19, 1.30s/it]\u001b[A\n",
+ " 46%|███████████████████▊ | 12/26 [00:14<00:18, 1.34s/it]\u001b[A\n",
+ " 50%|█████████████████████▌ | 13/26 [00:16<00:17, 1.33s/it]\u001b[A\n",
+ " 54%|███████████████████████▏ | 14/26 [00:17<00:15, 1.31s/it]\u001b[A\n",
+ " 58%|████████████████████████▊ | 15/26 [00:19<00:14, 1.36s/it]\u001b[A\n",
+ " 62%|██████████████████████████▍ | 16/26 [00:20<00:13, 1.34s/it]\u001b[A\n",
+ " 65%|████████████████████████████ | 17/26 [00:21<00:12, 1.35s/it]\u001b[A\n",
+ " 69%|█████████████████████████████▊ | 18/26 [00:22<00:10, 1.29s/it]\u001b[A\n",
+ " 73%|███████████████████████████████▍ | 19/26 [00:24<00:09, 1.32s/it]\u001b[A\n",
+ " 77%|█████████████████████████████████ | 20/26 [00:25<00:08, 1.33s/it]\u001b[A\n",
+ " 81%|██████████████████████████████████▋ | 21/26 [00:26<00:06, 1.27s/it]\u001b[A\n",
+ " 85%|████████████████████████████████████▍ | 22/26 [00:28<00:05, 1.27s/it]\u001b[A\n",
+ " 88%|██████████████████████████████████████ | 23/26 [00:29<00:03, 1.31s/it]\u001b[A\n",
+ " 92%|███████████████████████████████████████▋ | 24/26 [00:30<00:02, 1.38s/it]\u001b[A\n",
+ " 96%|█████████████████████████████████████████▎ | 25/26 [00:32<00:01, 1.42s/it]\u001b[A\n",
+ " \u001b[A\n",
+ " 96%|███████████████████████████████████▋ | 5500/5700 [2:33:53<03:32, 1.06s/it]\n",
+ "100%|███████████████████████████████████████████| 26/26 [00:33<00:00, 1.05s/it]\u001b[A\n",
+ " \u001b[ASaving model checkpoint to ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-5500\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-5500/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-5500/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/checkpoint-5500/preprocessor_config.json\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ "Deleting older checkpoint [wav2vec2-large-xls-r-300m-chuvash/checkpoint-4500] due to args.save_total_limit\n",
+ "100%|█████████████████████████████████████| 5700/5700 [2:40:04<00:00, 1.32it/s]\n",
+ "\n",
+ "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
+ "\n",
+ "\n",
+ "100%|█████████████████████████████████████| 5700/5700 [2:40:04<00:00, 1.68s/it]\n",
+ "Saving model checkpoint to ./wav2vec2-large-xls-r-300m-chuvash\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-chuvash/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
+ "***** Running Evaluation *****\n",
+ " Num examples = 810\n",
+ " Batch size = 32\n",
+ "100%|███████████████████████████████████████████| 26/26 [00:32<00:00, 1.25s/it]\n",
+ "Saving model checkpoint to ./wav2vec2-large-xls-r-300m-chuvash\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/config.json\n",
+ "Model weights saved in ./wav2vec2-large-xls-r-300m-chuvash/pytorch_model.bin\n",
+ "Configuration saved in ./wav2vec2-large-xls-r-300m-chuvash/preprocessor_config.json\n",
+ "Upload file pytorch_model.bin: 99%|██████▉| 1.16G/1.18G [00:40<00:00, 31.2MB/s]To https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-chuvash\n",
+ " 71257a3..34ac9cc main -> main\n",
+ "\n",
+ "Upload file pytorch_model.bin: 100%|███████| 1.18G/1.18G [00:42<00:00, 29.8MB/s]\n",
+ "Dropping the following result as it does not have all the necessary fields:\n",
+ "{'dataset': {'name': 'MOZILLA-FOUNDATION/COMMON_VOICE_7_0 - CV', 'type': 'common_voice', 'args': 'Config: cv, Training split: train+validation, Eval split: test'}}\n",
+ "To https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-chuvash\n",
+ " 34ac9cc..1be96ec main -> main\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "!python run_speech_recognition_ctc.py \\\n",
+ "\t--dataset_name=\"mozilla-foundation/common_voice_7_0\" \\\n",
+ "\t--model_name_or_path=\"facebook/wav2vec2-xls-r-300m\" \\\n",
+ "\t--dataset_config_name=\"cv\" \\\n",
+ "\t--output_dir=\"./wav2vec2-large-xls-r-300m-chuvash\" \\\n",
+ "\t--overwrite_output_dir \\\n",
+ "\t--num_train_epochs=\"100\" \\\n",
+ "\t--per_device_train_batch_size=\"32\" \\\n",
+ "\t--per_device_eval_batch_size=\"32\" \\\n",
+ "\t--gradient_accumulation_steps=\"1\" \\\n",
+ "\t--learning_rate=\"3e-4\" \\\n",
+ "\t--warmup_steps=\"500\" \\\n",
+ "\t--length_column_name=\"input_length\" \\\n",
+ "\t--evaluation_strategy=\"steps\" \\\n",
+ "\t--text_column_name=\"sentence\" \\\n",
+ "\t--chars_to_ignore , ? . ! \\- \\; \\: \\\" “ % ‘ ” � — ’ … – \\\n",
+ "\t--save_steps=\"500\" \\\n",
+ "\t--eval_steps=\"500\" \\\n",
+ "\t--logging_steps=\"100\" \\\n",
+ "\t--layerdrop=\"0.0\" \\\n",
+ "\t--activation_dropout=\"0.1\" \\\n",
+ "\t--save_total_limit=\"2\" \\\n",
+ "\t--freeze_feature_encoder \\\n",
+ "\t--feat_proj_dropout=\"0.0\" \\\n",
+ "\t--mask_time_prob=\"0.75\" \\\n",
+ "\t--mask_time_length=\"10\" \\\n",
+ "\t--mask_feature_prob=\"0.25\" \\\n",
+ "\t--mask_feature_length=\"64\" \\\n",
+ "\t--gradient_checkpointing \\\n",
+ "\t--use_auth_token \\\n",
+ "\t--fp16 \\\n",
+ "\t--group_by_length \\\n",
+ "\t--do_train --do_eval \\\n",
+ " --push_to_hub > out.log"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# !rm -rf wav2vec2-large-xls-r-300m-bashkir"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!ls -ltr"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Filesystem Size Used Avail Use% Mounted on\n",
+ "overlay 3.5T 963G 2.4T 29% /\n",
+ "tmpfs 64M 0 64M 0% /dev\n",
+ "tmpfs 87G 0 87G 0% /sys/fs/cgroup\n",
+ "tmpfs 87G 8.0K 87G 1% /dev/shm\n",
+ "/dev/md0 3.5T 963G 2.4T 29% /etc/group\n",
+ "tmpfs 87G 12K 87G 1% /proc/driver/nvidia\n",
+ "/dev/vda1 49G 6.4G 42G 14% /usr/bin/nvidia-smi\n",
+ "udev 87G 0 87G 0% /dev/nvidia0\n",
+ "tmpfs 87G 0 87G 0% /proc/acpi\n",
+ "tmpfs 87G 0 87G 0% /proc/scsi\n",
+ "tmpfs 87G 0 87G 0% /sys/firmware\n"
+ ]
+ }
+ ],
+ "source": [
+ "!df -h"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Downloading and preparing dataset common_voice/cv to /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/cv/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba...\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "d664c297fdc04696b86c7faddfa293f1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/486M [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "0 examples [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "0 examples [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "0 examples [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "0 examples [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "0 examples [00:00, ? examples/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Dataset common_voice downloaded and prepared to /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/cv/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba. Subsequent calls will reuse this data.\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/cv/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba)\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "1794\n"
+ ]
+ }
+ ],
+ "source": [
+ "from datasets import load_dataset, load_metric, Audio\n",
+ "\n",
+ "common_voice_train = load_dataset(\"mozilla-foundation/common_voice_7_0\", \"cv\", use_auth_token=True, split=\"train+validation\")\n",
+ "common_voice_test = load_dataset(\"mozilla-foundation/common_voice_7_0\", \"cv\", use_auth_token=True, split=\"test\")\n",
+ "\n",
+ "print(len(common_voice_train))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5606.25"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(common_voice_train) * 100 / 32"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "common_voice_train = common_voice_train.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])\n",
+ "common_voice_test = common_voice_test.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from datasets import ClassLabel\n",
+ "import random\n",
+ "import pandas as pd\n",
+ "from IPython.display import display, HTML\n",
+ "\n",
+ "def show_random_elements(dataset, num_examples=10):\n",
+ " assert num_examples <= len(dataset), \"Can't pick more elements than there are in the dataset.\"\n",
+ " picks = []\n",
+ " for _ in range(num_examples):\n",
+ " pick = random.randint(0, len(dataset)-1)\n",
+ " while pick in picks:\n",
+ " pick = random.randint(0, len(dataset)-1)\n",
+ " picks.append(pick)\n",
+ " \n",
+ " df = pd.DataFrame(dataset[picks])\n",
+ " display(HTML(df.to_html()))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sentence | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Вӗри чейпе пӗрле икӗ булк�� параҫҫӗ. | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Ҫак тӗнчене ырӑрах, ҫутӑрах тӑвакансем кирлех. | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Тавара туяннине ӗнентерекен документсене упрамалла. | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Кӑҫал фильмсене Ҫӗмӗрле, Улатӑр хулисенчи кинотеатрӗсенче те кӑтартӗҫ. | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Чӑваш Енре чӑваш чӗлхи предмечӗ пирки хӗрсе калаҫаҫҫӗ. | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " Ун чухне ҫапла апатланайнӑ-ши? | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " Вӗсене тепӗр хут туясчӗ. | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " Занятисене пырӑр. | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " Шала кайнӑ чиртен сывалма кӑткӑс. | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " Вӗсене «Хӑрушсӑр тата пахалӑхлӑ ҫулсем» федераци программипе килӗшӳллӗн юсӗҫ. | \n",
+ "
\n",
+ " \n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "show_random_elements(common_voice_train.remove_columns([\"path\", \"audio\"]), num_examples=10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import re\n",
+ "chars_to_remove_regex = '[\\,\\?\\.\\!\\-\\;\\:\\\"\\“\\%\\‘\\”\\�\\—\\’\\…\\–]'\n",
+ "\n",
+ "def remove_special_characters(batch):\n",
+ " batch[\"sentence\"] = re.sub(chars_to_remove_regex, '', batch[\"sentence\"]).lower()\n",
+ " return batch"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "38286f69f9bd4d0a9979d0e247d13463",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/1794 [00:00, ?ex/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "3e36f2e019754224bdb2ea26d6adc18a",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/810 [00:00, ?ex/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "common_voice_train = common_voice_train.map(remove_special_characters)\n",
+ "common_voice_test = common_voice_test.map(remove_special_characters)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Loading cached processed dataset at /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/cv/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba/cache-06d8a0f73b389fef.arrow\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'path': 'cv-corpus-7.0-2021-07-21/cv/clips/common_voice_cv_18025754.mp3',\n",
+ " 'audio': {'path': 'cv-corpus-7.0-2021-07-21/cv/clips/common_voice_cv_18025754.mp3',\n",
+ " 'array': array([ 0. , 0. , 0. , ..., -0.00814039,\n",
+ " -0.00753635, -0.00696993], dtype=float32),\n",
+ " 'sampling_rate': 48000},\n",
+ " 'sentence': 'вещани концепцийӗ – «ирӗклӗ»'}"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# start_with_ar = common_voice_train.filter(lambda example: '–' in example['sentence'])\n",
+ "# start_with_ar[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# start_with_ar"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def replace_hatted_characters(batch):\n",
+ "# batch[\"sentence\"] = re.sub('[â]', 'a', batch[\"sentence\"])\n",
+ "# batch[\"sentence\"] = re.sub('[î]', 'i', batch[\"sentence\"])\n",
+ "# batch[\"sentence\"] = re.sub('[ô]', 'o', batch[\"sentence\"])\n",
+ "# batch[\"sentence\"] = re.sub('[û]', 'u', batch[\"sentence\"])\n",
+ " return batch"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "62618c425072417fbcf8dabd8852283d",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/1794 [00:00, ?ex/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "954988c8cd0449f1940273af8068e159",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/810 [00:00, ?ex/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "common_voice_train = common_voice_train.map(replace_hatted_characters)\n",
+ "common_voice_test = common_voice_test.map(replace_hatted_characters)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def extract_all_chars(batch):\n",
+ " all_text = \" \".join(batch[\"sentence\"])\n",
+ " vocab = list(set(all_text))\n",
+ " return {\"vocab\": [vocab], \"all_text\": [all_text]}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "cebc0088426f46ce87792998a3a3f666",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/1 [00:00, ?ba/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "2d8bf33170024d9c9d27e708f5802787",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/1 [00:00, ?ba/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "vocab_train = common_voice_train.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_train.column_names)\n",
+ "vocab_test = common_voice_test.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_test.column_names)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vocab_list = list(set(vocab_train[\"vocab\"][0]) | set(vocab_test[\"vocab\"][0]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{' ': 0,\n",
+ " '«': 1,\n",
+ " '»': 2,\n",
+ " 'ç': 3,\n",
+ " 'ă': 4,\n",
+ " 'ĕ': 5,\n",
+ " 'а': 6,\n",
+ " 'б': 7,\n",
+ " 'в': 8,\n",
+ " 'г': 9,\n",
+ " 'д': 10,\n",
+ " 'е': 11,\n",
+ " 'ж': 12,\n",
+ " 'з': 13,\n",
+ " 'и': 14,\n",
+ " 'й': 15,\n",
+ " 'к': 16,\n",
+ " 'л': 17,\n",
+ " 'м': 18,\n",
+ " 'н': 19,\n",
+ " 'о': 20,\n",
+ " 'п': 21,\n",
+ " 'р': 22,\n",
+ " 'с': 23,\n",
+ " 'т': 24,\n",
+ " 'у': 25,\n",
+ " 'ф': 26,\n",
+ " 'х': 27,\n",
+ " 'ц': 28,\n",
+ " 'ч': 29,\n",
+ " 'ш': 30,\n",
+ " 'щ': 31,\n",
+ " 'ъ': 32,\n",
+ " 'ы': 33,\n",
+ " 'ь': 34,\n",
+ " 'э': 35,\n",
+ " 'ю': 36,\n",
+ " 'я': 37,\n",
+ " 'ҫ': 38,\n",
+ " 'ӑ': 39,\n",
+ " 'ӗ': 40,\n",
+ " 'ӳ': 41}"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vocab_dict = {v: k for k, v in enumerate(sorted(vocab_list))}\n",
+ "vocab_dict"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "file ./config.json not found\n",
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "44\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/opt/conda/lib/python3.8/site-packages/huggingface_hub/hf_api.py:1001: FutureWarning: `create_repo` now takes `token` as an optional positional argument. Be sure to adapt your code!\n",
+ " warnings.warn(\n",
+ "Cloning https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-chuvash into local empty directory.\n",
+ "To https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-chuvash\n",
+ " b5a5120..f0fd0b1 main -> main\n",
+ "\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'https://huggingface.co/infinitejoy/wav2vec2-large-xls-r-300m-chuvash/commit/f0fd0b1a9a8b5065f7c708f6633b736246777617'"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vocab_dict[\"|\"] = vocab_dict[\" \"]\n",
+ "del vocab_dict[\" \"]\n",
+ "\n",
+ "vocab_dict[\"[UNK]\"] = len(vocab_dict)\n",
+ "vocab_dict[\"[PAD]\"] = len(vocab_dict)\n",
+ "print(len(vocab_dict))\n",
+ "\n",
+ "import json\n",
+ "with open('./vocab.json', 'w') as vocab_file:\n",
+ " json.dump(vocab_dict, vocab_file)\n",
+ " \n",
+ "from transformers import Wav2Vec2CTCTokenizer\n",
+ "\n",
+ "tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(\"./\", unk_token=\"[UNK]\", pad_token=\"[PAD]\", word_delimiter_token=\"|\")\n",
+ "\n",
+ "repo_name = \"wav2vec2-large-xls-r-300m-chuvash\"\n",
+ "\n",
+ "tokenizer.push_to_hub(repo_name)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "--2022-01-25 05:51:53-- https://raw.githubusercontent.com/huggingface/transformers/master/examples/research_projects/robust-speech-event/eval.py\n",
+ "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.111.133, ...\n",
+ "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 4421 (4.3K) [text/plain]\n",
+ "Saving to: ‘eval.py’\n",
+ "\n",
+ "eval.py 100%[===================>] 4.32K --.-KB/s in 0s \n",
+ "\n",
+ "2022-01-25 05:51:53 (11.6 MB/s) - ‘eval.py’ saved [4421/4421]\n",
+ "\n",
+ "total 1232556\n",
+ "-rw-r--r-- 1 ovh ovh 272 Jan 25 02:49 vocab.json\n",
+ "-rw-r--r-- 1 ovh ovh 260 Jan 25 02:49 tokenizer_config.json\n",
+ "-rw-r--r-- 1 ovh ovh 309 Jan 25 02:49 special_tokens_map.json\n",
+ "-rw-r--r-- 1 ovh ovh 23 Jan 25 02:49 added_tokens.json\n",
+ "drwxr-xr-x 2 ovh ovh 4096 Jan 25 05:21 checkpoint-5500\n",
+ "drwxr-xr-x 2 ovh ovh 4096 Jan 25 05:35 checkpoint-6000\n",
+ "-rw-r--r-- 1 ovh ovh 197 Jan 25 05:46 train_results.json\n",
+ "-rw-r--r-- 1 ovh ovh 11278 Jan 25 05:46 trainer_state.json\n",
+ "-rw-r--r-- 1 ovh ovh 224 Jan 25 05:46 eval_results.json\n",
+ "-rw-r--r-- 1 ovh ovh 2033 Jan 25 05:46 config.json\n",
+ "-rw-r--r-- 1 ovh ovh 399 Jan 25 05:46 all_results.json\n",
+ "-rw-r--r-- 1 ovh ovh 1262058993 Jan 25 05:46 pytorch_model.bin\n",
+ "-rw-r--r-- 1 ovh ovh 3055 Jan 25 05:46 training_args.bin\n",
+ "-rw-r--r-- 1 ovh ovh 212 Jan 25 05:46 preprocessor_config.json\n",
+ "-rw-r--r-- 1 ovh ovh 2253 Jan 25 05:49 README.md\n",
+ "-rw-r--r-- 1 ovh ovh 4421 Jan 25 05:51 eval.py\n"
+ ]
+ }
+ ],
+ "source": [
+ "!wget -O eval.py https://raw.githubusercontent.com/huggingface/transformers/master/examples/research_projects/robust-speech-event/eval.py\n",
+ "!cp eval.py wav2vec2-large-xls-r-300m-chuvash\n",
+ "!ls -ltr wav2vec2-large-xls-r-300m-chuvash"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/bas/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba)\n",
+ "100%|█████████████████████████████████████████| 375/375 [03:03<00:00, 2.04ex/s]\n",
+ "WER: 1.0408274360370169\n",
+ "CER: 2.2848350566223536\n",
+ "100%|██████████████████████████████████████| 375/375 [00:00<00:00, 20474.93ex/s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "!cd wav2vec2-large-xls-r-300m-chuvash; python eval.py --model_id ./ --dataset mozilla-foundation/common_voice_7_0 --config cv --split test --log_outputs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "24592b0be30e4eafb1949cf09d1c4fb4",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/260 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f9bf2ab0d2fa4d3f9235cc6d1ab772f1",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/574 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b0791474a34043da8057e06741472ade",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/23.0 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "1ccbd582d616458b87c76ac8dc5b6b36",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/309 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "# from transformers import AutoModelForCTC, Wav2Vec2Processor\n",
+ "\n",
+ "# model = AutoModelForCTC.from_pretrained(\"infinitejoy/wav2vec2-large-xls-r-300m-bashkir\")\n",
+ "# processor = Wav2Vec2Processor.from_pretrained(\"infinitejoy/wav2vec2-large-xls-r-300m-bashkir\")\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "013fabff2ea243a0a728a79b8f54ae09",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/1.99k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "a8d9ca6d024f46f58301bfbcc475e41a",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/1.18G [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b336e2647c05466d87a11dfa326e30d6",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/212 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "8e6962320ad944439261482617be4869",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/260 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "99de2ef750aa49fd986965d66853a5ea",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/520 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "765670f93e5f4c2e849c98d53e616f38",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/23.0 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "812abafc8f6b49e3a498718d034a379b",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/309 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "ename": "AssertionError",
+ "evalue": "55",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mlogits\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_values\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlogits\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0;32massert\u001b[0m \u001b[0mlogits\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m32\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlogits\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;31mAssertionError\u001b[0m: 55"
+ ]
+ }
+ ],
+ "source": [
+ "# from transformers import AutoModelForCTC, AutoProcessor\n",
+ "# from datasets import load_dataset\n",
+ "\n",
+ "# model = AutoModelForCTC.from_pretrained(\"infinitejoy/wav2vec2-large-xls-r-300m-bashkir\")\n",
+ "# processor = AutoProcessor.from_pretrained(\"infinitejoy/wav2vec2-large-xls-r-300m-bashkir\")\n",
+ "\n",
+ "# input_values = processor(common_voice_test[0][\"audio\"][\"array\"], return_tensors=\"pt\", sampling_rate=16_000).input_values\n",
+ "# # input_values = input_values.to(\"cuda\")\n",
+ "\n",
+ "# logits = model(input_values).logits\n",
+ "\n",
+ "# assert logits.shape[-1] == 32, logits.shape[-1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/cv/7.0.0/33e08856cfa0d0665e837bcad73ffd920a0bc713ce8c5fffb55dbdf1c084d5ba)\n"
+ ]
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "77ddec247efe46658e96cce814c59016",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/1.99k [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "5aaf8d9a09ac47e4838688a3c456934d",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/212 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "32b0268554f04ba0b37e499882579624",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/1.18G [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "b4626ee1310a40edaa7ac7d91a4fb7dd",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/293 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "ea454aed01d6461a9b7dd91308c6d455",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/435 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "f1b9f2736f4e43d49143ed0bccc765d7",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/23.0 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "82e09f6ae3b844ed904cba2029899d3c",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Downloading: 0%| | 0.00/502 [00:00, ?B/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "d5c34d06c5ed48408e32e7c5bb80ade6",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ " 0%| | 0/10 [00:00, ?ex/s]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['аса илтеретпӗр вӑл кашни ҫулах иртет чӑвашсем те унта тӑтӑшах хутшӑнаҫҫӗ', 'вӑл ун чухне ытти ӳрешарана та шалӑвне пӗчӗклетме сӗннӗччӗ', 'проблемине историпе ҫыхӑнтарма пӗлмелле хамӑр истори тӑрӑх шухӑшламалла', 'валерий питӗ ҫамрӑк пулнӑ', 'чӑвашсен килҫурчӗ уҫӑлчӗ ӗҫлет', 'шовинистсем таптанине чӑтма патша саманинчех хӑнӑхнӑ', 'хайхискерсем хӑйсене шырани пирки шухӑшламан та', 'уяв хӗрлӗ лапамра иртнӗ', 'куншӑн штрафлаҫҫӗ', 'хӑй эрех сыпкаланӑ пулсан та руль умне ларнӑ']\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'Аса илтеретпӗр, вӑл кашни ҫулах иртет, чӑвашсем те унта тӑтӑшах хутшӑнаҫҫӗ.'"
+ ]
+ },
+ "execution_count": 42,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from datasets import Audio, Dataset, load_dataset, load_metric\n",
+ "from transformers import AutoFeatureExtractor, pipeline\n",
+ "\n",
+ "dataset = load_dataset(\"mozilla-foundation/common_voice_7_0\", \"cv\", use_auth_token=True, split=\"train+validation\")\n",
+ "\n",
+ "# for testing: only process the first two examples as a test\n",
+ "dataset = dataset.select(range(10))\n",
+ "\n",
+ "repo_name = 'infinitejoy/wav2vec2-large-xls-r-300m-chuvash'\n",
+ "\n",
+ "# load processor\n",
+ "feature_extractor = AutoFeatureExtractor.from_pretrained(repo_name)\n",
+ "# feature_extractor = processor_with_lm.feature_extractor\n",
+ "sampling_rate = feature_extractor.sampling_rate\n",
+ "\n",
+ "# resample audio\n",
+ "dataset = dataset.cast_column(\"audio\", Audio(sampling_rate=sampling_rate))\n",
+ "\n",
+ "# load eval pipeline\n",
+ "asr = pipeline(\"automatic-speech-recognition\", model=repo_name, feature_extractor=feature_extractor)\n",
+ "\n",
+ "# map function to decode audio\n",
+ "def map_to_pred(batch):\n",
+ " prediction = asr(\n",
+ " batch[\"audio\"][\"array\"])\n",
+ "\n",
+ " batch[\"prediction\"] = prediction[\"text\"]\n",
+ " batch[\"target\"] = batch[\"sentence\"]\n",
+ " return batch\n",
+ "\n",
+ "# run inference on all examples\n",
+ "result = dataset.map(map_to_pred, remove_columns=dataset.column_names)\n",
+ "print(result[\"prediction\"])\n",
+ "\n",
+ "result[0]['target']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "authorship_tag": "ABX9TyM3OaMlm9YQtKpl28c8gBBd",
+ "include_colab_link": true,
+ "name": "DebugOVHTransformers.ipynb",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.8"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}