File size: 28,439 Bytes

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# HuggingFace challenge - Debugger notebook\n",
    "Run this notebook to verify your libraries versions, check GPU config and run a quick training"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "id": "T2utsYSKszvv"
   },
   "outputs": [],
   "source": [
    "import platform\n",
    "import multiprocessing\n",
    "\n",
    "import torch\n",
    "import transformers\n",
    "import datasets\n",
    "\n",
    "import soundfile"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Print main infos"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Check your GPU informations (if any)\n",
    "If you launched an AI Training job with GPU resources, they should be listed below (Tesla V100s 32GB).\n",
    "Driver and CUDA version "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "TorMtpwPv6RQ"
   },
   "source": [
    "## Quick training run with a dummy model and data\n",
    "more information on https://github.com/huggingface/transformers/tree/master/examples/pytorch/speech-recognition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "fevoJD15u4Ss",
    "outputId": "5861d34e-745b-45ee-e780-ed363043e655"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2022-01-31 17:09:10--  https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py\n",
      "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.110.133, ...\n",
      "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 30360 (30K) [text/plain]\n",
      "Saving to: ‘run_speech_recognition_ctc.py’\n",
      "\n",
      "run_speech_recognit 100%[===================>]  29.65K  --.-KB/s    in 0.001s  \n",
      "\n",
      "2022-01-31 17:09:10 (55.6 MB/s) - ‘run_speech_recognition_ctc.py’ saved [30360/30360]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!wget -O run_speech_recognition_ctc.py https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "Mz4bubhxxsad",
    "outputId": "23398525-cc19-43c2-9fec-497e06214f29"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "01/31/2022 17:10:15 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: True\n",
      "01/31/2022 17:10:15 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n",
      "_n_gpu=1,\n",
      "adafactor=False,\n",
      "adam_beta1=0.9,\n",
      "adam_beta2=0.999,\n",
      "adam_epsilon=1e-08,\n",
      "bf16=False,\n",
      "bf16_full_eval=False,\n",
      "dataloader_drop_last=False,\n",
      "dataloader_num_workers=0,\n",
      "dataloader_pin_memory=True,\n",
      "ddp_bucket_cap_mb=None,\n",
      "ddp_find_unused_parameters=None,\n",
      "debug=[],\n",
      "deepspeed=None,\n",
      "disable_tqdm=False,\n",
      "do_eval=True,\n",
      "do_predict=False,\n",
      "do_train=True,\n",
      "eval_accumulation_steps=None,\n",
      "eval_steps=500,\n",
      "evaluation_strategy=IntervalStrategy.STEPS,\n",
      "fp16=True,\n",
      "fp16_backend=auto,\n",
      "fp16_full_eval=False,\n",
      "fp16_opt_level=O1,\n",
      "gradient_accumulation_steps=1,\n",
      "gradient_checkpointing=True,\n",
      "greater_is_better=None,\n",
      "group_by_length=True,\n",
      "half_precision_backend=auto,\n",
      "hub_model_id=None,\n",
      "hub_strategy=HubStrategy.EVERY_SAVE,\n",
      "hub_token=<HUB_TOKEN>,\n",
      "ignore_data_skip=False,\n",
      "label_names=None,\n",
      "label_smoothing_factor=0.0,\n",
      "learning_rate=0.0003,\n",
      "length_column_name=input_length,\n",
      "load_best_model_at_end=False,\n",
      "local_rank=-1,\n",
      "log_level=-1,\n",
      "log_level_replica=-1,\n",
      "log_on_each_node=True,\n",
      "logging_dir=./runs/Jan31_17-10-15_job-6a6be32c-c82d-4385-805b-1f7606124d5b,\n",
      "logging_first_step=False,\n",
      "logging_nan_inf_filter=True,\n",
      "logging_steps=500,\n",
      "logging_strategy=IntervalStrategy.STEPS,\n",
      "lr_scheduler_type=SchedulerType.LINEAR,\n",
      "max_grad_norm=1.0,\n",
      "max_steps=10,\n",
      "metric_for_best_model=None,\n",
      "mp_parameters=,\n",
      "no_cuda=False,\n",
      "num_train_epochs=3.0,\n",
      "optim=OptimizerNames.ADAMW_HF,\n",
      "output_dir=./,\n",
      "overwrite_output_dir=True,\n",
      "past_index=-1,\n",
      "per_device_eval_batch_size=8,\n",
      "per_device_train_batch_size=2,\n",
      "prediction_loss_only=False,\n",
      "push_to_hub=True,\n",
      "push_to_hub_model_id=None,\n",
      "push_to_hub_organization=None,\n",
      "push_to_hub_token=<PUSH_TO_HUB_TOKEN>,\n",
      "remove_unused_columns=True,\n",
      "report_to=[],\n",
      "resume_from_checkpoint=None,\n",
      "run_name=./,\n",
      "save_on_each_node=False,\n",
      "save_steps=5,\n",
      "save_strategy=IntervalStrategy.STEPS,\n",
      "save_total_limit=1,\n",
      "seed=42,\n",
      "sharded_ddp=[],\n",
      "skip_memory_metrics=True,\n",
      "tf32=None,\n",
      "tpu_metrics_debug=False,\n",
      "tpu_num_cores=None,\n",
      "use_legacy_prediction_loop=False,\n",
      "warmup_ratio=0.0,\n",
      "warmup_steps=0,\n",
      "weight_decay=0.0,\n",
      "xpu_backend=None,\n",
      ")\n",
      "Downloading: 100%|█████████████████████████| 10.1k/10.1k [00:00<00:00, 7.28MB/s]\n",
      "Downloading: 100%|█████████████████████████| 2.98k/2.98k [00:00<00:00, 3.39MB/s]\n",
      "Downloading: 100%|██████████████████████████| 53.1k/53.1k [00:00<00:00, 325kB/s]\n",
      "Downloading and preparing dataset common_voice/ab to /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ab/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8...\n",
      "Downloading: 100%|█████████████████████████| 1.72G/1.72G [01:31<00:00, 18.8MB/s]\n",
      "Dataset common_voice downloaded and prepared to /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ab/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8. Subsequent calls will reuse this data.\n",
      "01/31/2022 17:13:15 - WARNING - datasets.builder - Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/ab/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8)\n",
      "remove special characters from datasets: 30002ex [00:05, 5673.36ex/s]\n",
      "remove special characters from datasets: 9184ex [00:01, 5662.70ex/s]\n",
      "loading configuration file https://huggingface.co/hf-test/xls-r-dummy/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/8157526a5096028eb61c63d228d882e5437edef5cb8b1a033ae35bf6249d1568.80b921aeb31bf1fa045a15aafa0e6f7e2ac68d338c1d83a3c76c99e260b22a62\n",
      "Model config Wav2Vec2Config {\n",
      "  \"_name_or_path\": \"hf-test/xls-r-dummy\",\n",
      "  \"activation_dropout\": 0.1,\n",
      "  \"adapter_kernel_size\": 3,\n",
      "  \"adapter_stride\": 2,\n",
      "  \"add_adapter\": false,\n",
      "  \"apply_spec_augment\": true,\n",
      "  \"architectures\": [\n",
      "    \"Wav2Vec2Model\"\n",
      "  ],\n",
      "  \"attention_dropout\": 0.1,\n",
      "  \"bos_token_id\": 1,\n",
      "  \"classifier_proj_size\": 256,\n",
      "  \"codevector_dim\": 256,\n",
      "  \"contrastive_logits_temperature\": 0.1,\n",
      "  \"conv_bias\": false,\n",
      "  \"conv_dim\": [\n",
      "    32,\n",
      "    32,\n",
      "    32\n",
      "  ],\n",
      "  \"conv_kernel\": [\n",
      "    8,\n",
      "    8,\n",
      "    8\n",
      "  ],\n",
      "  \"conv_stride\": [\n",
      "    4,\n",
      "    4,\n",
      "    4\n",
      "  ],\n",
      "  \"ctc_loss_reduction\": \"sum\",\n",
      "  \"ctc_zero_infinity\": false,\n",
      "  \"diversity_loss_weight\": 0.1,\n",
      "  \"do_stable_layer_norm\": true,\n",
      "  \"eos_token_id\": 2,\n",
      "  \"feat_extract_activation\": \"gelu\",\n",
      "  \"feat_extract_dropout\": 0.0,\n",
      "  \"feat_extract_norm\": \"layer\",\n",
      "  \"feat_proj_dropout\": 0.1,\n",
      "  \"feat_quantizer_dropout\": 0.0,\n",
      "  \"final_dropout\": 0.1,\n",
      "  \"gradient_checkpointing\": false,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout\": 0.1,\n",
      "  \"hidden_dropout_prob\": 0.1,\n",
      "  \"hidden_size\": 16,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 20,\n",
      "  \"layer_norm_eps\": 1e-05,\n",
      "  \"layerdrop\": 0.1,\n",
      "  \"mask_feature_length\": 10,\n",
      "  \"mask_feature_min_masks\": 0,\n",
      "  \"mask_feature_prob\": 0.0,\n",
      "  \"mask_time_length\": 10,\n",
      "  \"mask_time_min_masks\": 2,\n",
      "  \"mask_time_prob\": 0.05,\n",
      "  \"model_type\": \"wav2vec2\",\n",
      "  \"num_adapter_layers\": 3,\n",
      "  \"num_attention_heads\": 2,\n",
      "  \"num_codevector_groups\": 2,\n",
      "  \"num_codevectors_per_group\": 320,\n",
      "  \"num_conv_pos_embedding_groups\": 2,\n",
      "  \"num_conv_pos_embeddings\": 16,\n",
      "  \"num_feat_extract_layers\": 3,\n",
      "  \"num_hidden_layers\": 4,\n",
      "  \"num_negatives\": 10,\n",
      "  \"output_hidden_size\": 16,\n",
      "  \"pad_token_id\": 0,\n",
      "  \"proj_codevector_dim\": 256,\n",
      "  \"tdnn_dilation\": [\n",
      "    1,\n",
      "    2,\n",
      "    3,\n",
      "    1,\n",
      "    1\n",
      "  ],\n",
      "  \"tdnn_dim\": [\n",
      "    512,\n",
      "    512,\n",
      "    512,\n",
      "    512,\n",
      "    1500\n",
      "  ],\n",
      "  \"tdnn_kernel\": [\n",
      "    5,\n",
      "    3,\n",
      "    3,\n",
      "    1,\n",
      "    1\n",
      "  ],\n",
      "  \"torch_dtype\": \"float32\",\n",
      "  \"transformers_version\": \"4.17.0.dev0\",\n",
      "  \"use_weighted_layer_sum\": false,\n",
      "  \"vocab_size\": 32,\n",
      "  \"xvector_output_dim\": 512\n",
      "}\n",
      "\n",
      "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  1.03ba/s]\n",
      "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00,  4.09ba/s]\n",
      "Didn't find file ./tokenizer_config.json. We won't load it.\n",
      "Didn't find file ./added_tokens.json. We won't load it.\n",
      "Didn't find file ./special_tokens_map.json. We won't load it.\n",
      "Didn't find file ./tokenizer.json. We won't load it.\n",
      "loading file ./vocab.json\n",
      "loading file None\n",
      "loading file None\n",
      "loading file None\n",
      "loading file None\n",
      "file ./config.json not found\n",
      "Adding <s> to the vocabulary\n",
      "Adding </s> to the vocabulary\n",
      "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
      "loading configuration file https://huggingface.co/hf-test/xls-r-dummy/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/8157526a5096028eb61c63d228d882e5437edef5cb8b1a033ae35bf6249d1568.80b921aeb31bf1fa045a15aafa0e6f7e2ac68d338c1d83a3c76c99e260b22a62\n",
      "Model config Wav2Vec2Config {\n",
      "  \"_name_or_path\": \"hf-test/xls-r-dummy\",\n",
      "  \"activation_dropout\": 0.1,\n",
      "  \"adapter_kernel_size\": 3,\n",
      "  \"adapter_stride\": 2,\n",
      "  \"add_adapter\": false,\n",
      "  \"apply_spec_augment\": true,\n",
      "  \"architectures\": [\n",
      "    \"Wav2Vec2Model\"\n",
      "  ],\n",
      "  \"attention_dropout\": 0.1,\n",
      "  \"bos_token_id\": 1,\n",
      "  \"classifier_proj_size\": 256,\n",
      "  \"codevector_dim\": 256,\n",
      "  \"contrastive_logits_temperature\": 0.1,\n",
      "  \"conv_bias\": false,\n",
      "  \"conv_dim\": [\n",
      "    32,\n",
      "    32,\n",
      "    32\n",
      "  ],\n",
      "  \"conv_kernel\": [\n",
      "    8,\n",
      "    8,\n",
      "    8\n",
      "  ],\n",
      "  \"conv_stride\": [\n",
      "    4,\n",
      "    4,\n",
      "    4\n",
      "  ],\n",
      "  \"ctc_loss_reduction\": \"sum\",\n",
      "  \"ctc_zero_infinity\": false,\n",
      "  \"diversity_loss_weight\": 0.1,\n",
      "  \"do_stable_layer_norm\": true,\n",
      "  \"eos_token_id\": 2,\n",
      "  \"feat_extract_activation\": \"gelu\",\n",
      "  \"feat_extract_dropout\": 0.0,\n",
      "  \"feat_extract_norm\": \"layer\",\n",
      "  \"feat_proj_dropout\": 0.1,\n",
      "  \"feat_quantizer_dropout\": 0.0,\n",
      "  \"final_dropout\": 0.1,\n",
      "  \"gradient_checkpointing\": false,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout\": 0.1,\n",
      "  \"hidden_dropout_prob\": 0.1,\n",
      "  \"hidden_size\": 16,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 20,\n",
      "  \"layer_norm_eps\": 1e-05,\n",
      "  \"layerdrop\": 0.1,\n",
      "  \"mask_feature_length\": 10,\n",
      "  \"mask_feature_min_masks\": 0,\n",
      "  \"mask_feature_prob\": 0.0,\n",
      "  \"mask_time_length\": 10,\n",
      "  \"mask_time_min_masks\": 2,\n",
      "  \"mask_time_prob\": 0.05,\n",
      "  \"model_type\": \"wav2vec2\",\n",
      "  \"num_adapter_layers\": 3,\n",
      "  \"num_attention_heads\": 2,\n",
      "  \"num_codevector_groups\": 2,\n",
      "  \"num_codevectors_per_group\": 320,\n",
      "  \"num_conv_pos_embedding_groups\": 2,\n",
      "  \"num_conv_pos_embeddings\": 16,\n",
      "  \"num_feat_extract_layers\": 3,\n",
      "  \"num_hidden_layers\": 4,\n",
      "  \"num_negatives\": 10,\n",
      "  \"output_hidden_size\": 16,\n",
      "  \"pad_token_id\": 0,\n",
      "  \"proj_codevector_dim\": 256,\n",
      "  \"tdnn_dilation\": [\n",
      "    1,\n",
      "    2,\n",
      "    3,\n",
      "    1,\n",
      "    1\n",
      "  ],\n",
      "  \"tdnn_dim\": [\n",
      "    512,\n",
      "    512,\n",
      "    512,\n",
      "    512,\n",
      "    1500\n",
      "  ],\n",
      "  \"tdnn_kernel\": [\n",
      "    5,\n",
      "    3,\n",
      "    3,\n",
      "    1,\n",
      "    1\n",
      "  ],\n",
      "  \"torch_dtype\": \"float32\",\n",
      "  \"transformers_version\": \"4.17.0.dev0\",\n",
      "  \"use_weighted_layer_sum\": false,\n",
      "  \"vocab_size\": 32,\n",
      "  \"xvector_output_dim\": 512\n",
      "}\n",
      "\n",
      "loading feature extractor configuration file https://huggingface.co/hf-test/xls-r-dummy/resolve/main/preprocessor_config.json from cache at /workspace/.cache/huggingface/transformers/0ba9471c5a13055b5740bbac451b95c783dcaead5aacc5d0175959022489c3aa.bd1cf6fc7017d09efe9b164cbc7b32f9bbc3b3bcc243032c6f8e87573bde4292\n",
      "Feature extractor Wav2Vec2FeatureExtractor {\n",
      "  \"do_normalize\": true,\n",
      "  \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n",
      "  \"feature_size\": 1,\n",
      "  \"padding_side\": \"right\",\n",
      "  \"padding_value\": 0.0,\n",
      "  \"return_attention_mask\": false,\n",
      "  \"sampling_rate\": 16000\n",
      "}\n",
      "\n",
      "loading weights file https://huggingface.co/hf-test/xls-r-dummy/resolve/main/pytorch_model.bin from cache at /workspace/.cache/huggingface/transformers/d374ffdefd19b7dca1d007484e8a16189d261a626cc06a3481bb034d23fe194a.4dc5ab5d8c52b8612a63c422a98f6d3de7e0bbf1469c52e89e028b4ec90e4b43\n",
      "All model checkpoint weights were used when initializing Wav2Vec2ForCTC.\n",
      "\n",
      "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at hf-test/xls-r-dummy and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "preprocess datasets: 30002ex [03:40, 136.35ex/s]\n",
      "preprocess datasets: 9184ex [01:06, 137.56ex/s]\n",
      "100%|██████████████████████████████████████████| 31/31 [00:00<00:00, 809.78ba/s]\n",
      "100%|██████████████████████████████████████████| 10/10 [00:00<00:00, 874.45ba/s]\n",
      "Configuration saved in ./preprocessor_config.json\n",
      "tokenizer config file saved in ./tokenizer_config.json\n",
      "Special tokens file saved in ./special_tokens_map.json\n",
      "added tokens file saved in ./added_tokens.json\n",
      "Configuration saved in ./config.json\n",
      "loading feature extractor configuration file ./preprocessor_config.json\n",
      "loading configuration file ./config.json\n",
      "Model config Wav2Vec2Config {\n",
      "  \"_name_or_path\": \"./\",\n",
      "  \"activation_dropout\": 0.0,\n",
      "  \"adapter_kernel_size\": 3,\n",
      "  \"adapter_stride\": 2,\n",
      "  \"add_adapter\": false,\n",
      "  \"apply_spec_augment\": true,\n",
      "  \"architectures\": [\n",
      "    \"Wav2Vec2Model\"\n",
      "  ],\n",
      "  \"attention_dropout\": 0.0,\n",
      "  \"bos_token_id\": 1,\n",
      "  \"classifier_proj_size\": 256,\n",
      "  \"codevector_dim\": 256,\n",
      "  \"contrastive_logits_temperature\": 0.1,\n",
      "  \"conv_bias\": false,\n",
      "  \"conv_dim\": [\n",
      "    32,\n",
      "    32,\n",
      "    32\n",
      "  ],\n",
      "  \"conv_kernel\": [\n",
      "    8,\n",
      "    8,\n",
      "    8\n",
      "  ],\n",
      "  \"conv_stride\": [\n",
      "    4,\n",
      "    4,\n",
      "    4\n",
      "  ],\n",
      "  \"ctc_loss_reduction\": \"mean\",\n",
      "  \"ctc_zero_infinity\": false,\n",
      "  \"diversity_loss_weight\": 0.1,\n",
      "  \"do_stable_layer_norm\": true,\n",
      "  \"eos_token_id\": 2,\n",
      "  \"feat_extract_activation\": \"gelu\",\n",
      "  \"feat_extract_dropout\": 0.0,\n",
      "  \"feat_extract_norm\": \"layer\",\n",
      "  \"feat_proj_dropout\": 0.0,\n",
      "  \"feat_quantizer_dropout\": 0.0,\n",
      "  \"final_dropout\": 0.0,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout\": 0.0,\n",
      "  \"hidden_dropout_prob\": 0.1,\n",
      "  \"hidden_size\": 16,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 20,\n",
      "  \"layer_norm_eps\": 1e-05,\n",
      "  \"layerdrop\": 0.0,\n",
      "  \"mask_feature_length\": 10,\n",
      "  \"mask_feature_min_masks\": 0,\n",
      "  \"mask_feature_prob\": 0.0,\n",
      "  \"mask_time_length\": 10,\n",
      "  \"mask_time_min_masks\": 2,\n",
      "  \"mask_time_prob\": 0.05,\n",
      "  \"model_type\": \"wav2vec2\",\n",
      "  \"num_adapter_layers\": 3,\n",
      "  \"num_attention_heads\": 2,\n",
      "  \"num_codevector_groups\": 2,\n",
      "  \"num_codevectors_per_group\": 320,\n",
      "  \"num_conv_pos_embedding_groups\": 2,\n",
      "  \"num_conv_pos_embeddings\": 16,\n",
      "  \"num_feat_extract_layers\": 3,\n",
      "  \"num_hidden_layers\": 4,\n",
      "  \"num_negatives\": 10,\n",
      "  \"output_hidden_size\": 16,\n",
      "  \"pad_token_id\": 51,\n",
      "  \"proj_codevector_dim\": 256,\n",
      "  \"tdnn_dilation\": [\n",
      "    1,\n",
      "    2,\n",
      "    3,\n",
      "    1,\n",
      "    1\n",
      "  ],\n",
      "  \"tdnn_dim\": [\n",
      "    512,\n",
      "    512,\n",
      "    512,\n",
      "    512,\n",
      "    1500\n",
      "  ],\n",
      "  \"tdnn_kernel\": [\n",
      "    5,\n",
      "    3,\n",
      "    3,\n",
      "    1,\n",
      "    1\n",
      "  ],\n",
      "  \"torch_dtype\": \"float32\",\n",
      "  \"transformers_version\": \"4.17.0.dev0\",\n",
      "  \"use_weighted_layer_sum\": false,\n",
      "  \"vocab_size\": 54,\n",
      "  \"xvector_output_dim\": 512\n",
      "}\n",
      "\n",
      "loading feature extractor configuration file ./preprocessor_config.json\n",
      "Feature extractor Wav2Vec2FeatureExtractor {\n",
      "  \"do_normalize\": true,\n",
      "  \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n",
      "  \"feature_size\": 1,\n",
      "  \"padding_side\": \"right\",\n",
      "  \"padding_value\": 0.0,\n",
      "  \"return_attention_mask\": false,\n",
      "  \"sampling_rate\": 16000\n",
      "}\n",
      "\n",
      "Didn't find file ./tokenizer.json. We won't load it.\n",
      "loading file ./vocab.json\n",
      "loading file ./tokenizer_config.json\n",
      "loading file ./added_tokens.json\n",
      "loading file ./special_tokens_map.json\n",
      "loading file None\n",
      "Adding <s> to the vocabulary\n",
      "Adding </s> to the vocabulary\n",
      "/workspace/xls-r-ab-test/./ is already a clone of https://huggingface.co/masapasa/xls-r-ab-test. Make sure you pull the latest changes with `repo.git_pull()`.\n",
      "01/31/2022 17:18:19 - WARNING - huggingface_hub.repository - /workspace/xls-r-ab-test/./ is already a clone of https://huggingface.co/masapasa/xls-r-ab-test. Make sure you pull the latest changes with `repo.git_pull()`.\n",
      "max_steps is given, it will override any value given in num_train_epochs\n",
      "Using amp half precision backend\n",
      "The following columns in the training set  don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
      "/opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
      "  warnings.warn(\n",
      "***** Running training *****\n",
      "  Num examples = 30002\n",
      "  Num Epochs = 1\n",
      "  Instantaneous batch size per device = 2\n",
      "  Total train batch size (w. parallel, distributed & accumulation) = 2\n",
      "  Gradient Accumulation steps = 1\n",
      "  Total optimization steps = 10\n",
      " 50%|██████████████████████                      | 5/10 [00:00<00:00, 10.35it/s]Saving model checkpoint to ./checkpoint-5\n",
      "Configuration saved in ./checkpoint-5/config.json\n",
      "Model weights saved in ./checkpoint-5/pytorch_model.bin\n",
      "Configuration saved in ./checkpoint-5/preprocessor_config.json\n",
      "Configuration saved in ./preprocessor_config.json\n",
      " 90%|███████████████████████████████████████▌    | 9/10 [00:03<00:00,  2.20it/s]Saving model checkpoint to ./checkpoint-10\n",
      "Configuration saved in ./checkpoint-10/config.json\n",
      "Model weights saved in ./checkpoint-10/pytorch_model.bin\n",
      "Configuration saved in ./checkpoint-10/preprocessor_config.json\n",
      "Deleting older checkpoint [checkpoint-5] due to args.save_total_limit\n",
      "\n",
      "\n",
      "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
      "\n",
      "\n",
      "{'train_runtime': 3.7102, 'train_samples_per_second': 5.391, 'train_steps_per_second': 2.695, 'train_loss': 67.9575927734375, 'epoch': 0.0}\n",
      "100%|███████████████████████████████████████████| 10/10 [00:03<00:00,  2.70it/s]\n",
      "Saving model checkpoint to ./\n",
      "Configuration saved in ./config.json\n",
      "Model weights saved in ./pytorch_model.bin\n",
      "Configuration saved in ./preprocessor_config.json\n",
      "Saving model checkpoint to ./\n",
      "Configuration saved in ./config.json\n",
      "Model weights saved in ./pytorch_model.bin\n",
      "Configuration saved in ./preprocessor_config.json\n",
      "Several commits (2) will be pushed upstream.\n",
      "01/31/2022 17:18:26 - WARNING - huggingface_hub.repository - Several commits (2) will be pushed upstream.\n",
      "The progress bars may be unreliable.\n",
      "01/31/2022 17:18:26 - WARNING - huggingface_hub.repository - The progress bars may be unreliable.\n",
      "Everything up-to-date\n",
      "\n",
      "01/31/2022 17:18:27 - WARNING - huggingface_hub.repository - Everything up-to-date\n",
      "\n",
      "Dropping the following result as it does not have all the necessary fields:\n",
      "{'dataset': {'name': 'common_voice', 'type': 'common_voice', 'args': 'ab'}}\n",
      "To https://huggingface.co/masapasa/xls-r-ab-test\n",
      "   b50c32e..4e53539  main -> main\n",
      "\n",
      "01/31/2022 17:18:33 - WARNING - huggingface_hub.repository - To https://huggingface.co/masapasa/xls-r-ab-test\n",
      "   b50c32e..4e53539  main -> main\n",
      "\n",
      "***** train metrics *****\n",
      "  epoch                    =        0.0\n",
      "  train_loss               =    67.9576\n",
      "  train_runtime            = 0:00:03.71\n",
      "  train_samples            =      30002\n",
      "  train_samples_per_second =      5.391\n",
      "  train_steps_per_second   =      2.695\n",
      "01/31/2022 17:18:36 - INFO - __main__ - *** Evaluate ***\n",
      "The following columns in the evaluation set  don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n",
      "***** Running Evaluation *****\n",
      "  Num examples = 9184\n",
      "  Batch size = 8\n",
      " 68%|███████████████████████████             | 777/1148 [02:07<01:06,  5.55it/s]"
     ]
    }
   ],
   "source": [
    "!python run_speech_recognition_ctc.py \\\n",
    "\t--dataset_name=\"mozilla-foundation/common_voice_8_0\" \\\n",
    "\t--model_name_or_path=\"hf-test/xls-r-dummy\" \\\n",
    "\t--dataset_config_name=\"ab\" \\\n",
    "\t--output_dir=\"./\" \\\n",
    "\t--overwrite_output_dir \\\n",
    "\t--max_steps=\"10\" \\\n",
    "\t--per_device_train_batch_size=\"2\" \\\n",
    "\t--learning_rate=\"3e-4\" \\\n",
    "\t--save_total_limit=\"1\" \\\n",
    "\t--evaluation_strategy=\"steps\" \\\n",
    "\t--text_column_name=\"sentence\" \\\n",
    "\t--length_column_name=\"input_length\" \\\n",
    "\t--save_steps=\"5\" \\\n",
    "\t--layerdrop=\"0.0\" \\\n",
    "\t--freeze_feature_encoder \\\n",
    "\t--gradient_checkpointing \\\n",
    "\t--fp16 \\\n",
    "\t--group_by_length \\\n",
    "\t--push_to_hub \\\n",
    "\t--use_auth_token \\\n",
    "\t--do_train --do_eva"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "authorship_tag": "ABX9TyM3OaMlm9YQtKpl28c8gBBd",
   "include_colab_link": true,
   "name": "DebugOVHTransformers.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}