{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# HuggingFace challenge - Debugger notebook\n", "Run this notebook to verify your libraries versions, check GPU config and run a quick training" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "T2utsYSKszvv" }, "outputs": [], "source": [ "import platform\n", "import multiprocessing\n", "\n", "import torch\n", "import transformers\n", "import datasets\n", "\n", "import soundfile" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Print main infos" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "5P6I-W9ts-kR", "outputId": "939bd550-1486-46a6-8371-e82ada0f448c" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Platform: Linux-5.11.0-37-generic-x86_64-with-glibc2.10\n", "CPU cores: 60\n", "Python version: 3.8.8\n", "PyTorch version: 1.10.1+cu102\n", "GPU is visible: True\n", "Transformers version: 4.16.0.dev0\n", "Datasets version: 1.17.1.dev0\n", "soundfile version: 0.10.3\n" ] } ], "source": [ "print(f\"Platform: {platform.platform()}\")\n", "print(f\"CPU cores: {multiprocessing.cpu_count()}\")\n", "\n", "print(f\"Python version: {platform.python_version()}\")\n", "\n", "print(f\"PyTorch version: {torch.__version__}\")\n", "print(f\"GPU is visible: {torch.cuda.is_available()}\")\n", "\n", "print(f\"Transformers version: {transformers.__version__}\")\n", "print(f\"Datasets version: {datasets.__version__}\")\n", "\n", "print(f\"soundfile version: {soundfile.__version__}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Check your GPU informations (if any)\n", "If you launched an AI Training job with GPU resources, they should be listed below (Tesla V100s 32GB).\n", "Driver and CUDA version " ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YT7fRnKctggU", "outputId": "f355a3e0-20da-489f-bd1f-5e508e792a68" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Wed Jan 12 10:34:59 2022 \n", "+-----------------------------------------------------------------------------+\n", "| NVIDIA-SMI 470.57.02 Driver Version: 470.57.02 CUDA Version: 11.4 |\n", "|-------------------------------+----------------------+----------------------+\n", "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|===============================+======================+======================|\n", "| 0 Tesla V100S-PCI... Off | 00000000:00:07.0 Off | 0 |\n", "| N/A 35C P0 27W / 250W | 4MiB / 32510MiB | 0% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", " \n", "+-----------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=============================================================================|\n", "| No running processes found |\n", "+-----------------------------------------------------------------------------+\n" ] } ], "source": [ "!nvidia-smi" ] }, { "cell_type": "markdown", "metadata": { "id": "TorMtpwPv6RQ" }, "source": [ "## Quick training run with a dummy model and data\n", "more information on https://github.com/huggingface/transformers/tree/master/examples/pytorch/speech-recognition" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "fevoJD15u4Ss", "outputId": "5861d34e-745b-45ee-e780-ed363043e655" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2022-01-31 16:15:26-- https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 30360 (30K) [text/plain]\n", "Saving to: ‘run_speech_recognition_ctc.py’\n", "\n", "run_speech_recognit 100%[===================>] 29.65K --.-KB/s in 0.001s \n", "\n", "2022-01-31 16:15:27 (57.7 MB/s) - ‘run_speech_recognition_ctc.py’ saved [30360/30360]\n", "\n" ] } ], "source": [ "!wget -O run_speech_recognition_ctc.py https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Mz4bubhxxsad", "outputId": "23398525-cc19-43c2-9fec-497e06214f29" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "01/31/2022 16:15:35 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: True\n", "01/31/2022 16:15:35 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", "_n_gpu=1,\n", "adafactor=False,\n", "adam_beta1=0.9,\n", "adam_beta2=0.999,\n", "adam_epsilon=1e-08,\n", "bf16=False,\n", "bf16_full_eval=False,\n", "dataloader_drop_last=False,\n", "dataloader_num_workers=0,\n", "dataloader_pin_memory=True,\n", "ddp_bucket_cap_mb=None,\n", "ddp_find_unused_parameters=None,\n", "debug=[],\n", "deepspeed=None,\n", "disable_tqdm=False,\n", "do_eval=True,\n", "do_predict=False,\n", "do_train=True,\n", "eval_accumulation_steps=None,\n", "eval_steps=500,\n", "evaluation_strategy=IntervalStrategy.STEPS,\n", "fp16=True,\n", "fp16_backend=auto,\n", "fp16_full_eval=False,\n", "fp16_opt_level=O1,\n", "gradient_accumulation_steps=1,\n", "gradient_checkpointing=True,\n", "greater_is_better=None,\n", "group_by_length=True,\n", "half_precision_backend=auto,\n", "hub_model_id=None,\n", "hub_strategy=HubStrategy.EVERY_SAVE,\n", "hub_token=,\n", "ignore_data_skip=False,\n", "label_names=None,\n", "label_smoothing_factor=0.0,\n", "learning_rate=0.0003,\n", "length_column_name=input_length,\n", "load_best_model_at_end=False,\n", "local_rank=-1,\n", "log_level=-1,\n", "log_level_replica=-1,\n", "log_on_each_node=True,\n", "logging_dir=./runs/Jan31_16-15-35_job-6a6be32c-c82d-4385-805b-1f7606124d5b,\n", "logging_first_step=False,\n", "logging_nan_inf_filter=True,\n", "logging_steps=500,\n", "logging_strategy=IntervalStrategy.STEPS,\n", "lr_scheduler_type=SchedulerType.LINEAR,\n", "max_grad_norm=1.0,\n", "max_steps=10,\n", "metric_for_best_model=None,\n", "mp_parameters=,\n", "no_cuda=False,\n", "num_train_epochs=3.0,\n", "optim=OptimizerNames.ADAMW_HF,\n", "output_dir=./,\n", "overwrite_output_dir=True,\n", "past_index=-1,\n", "per_device_eval_batch_size=8,\n", "per_device_train_batch_size=2,\n", "prediction_loss_only=False,\n", "push_to_hub=False,\n", "push_to_hub_model_id=None,\n", "push_to_hub_organization=None,\n", "push_to_hub_token=,\n", "remove_unused_columns=True,\n", "report_to=[],\n", "resume_from_checkpoint=None,\n", "run_name=./,\n", "save_on_each_node=False,\n", "save_steps=5,\n", "save_strategy=IntervalStrategy.STEPS,\n", "save_total_limit=1,\n", "seed=42,\n", "sharded_ddp=[],\n", "skip_memory_metrics=True,\n", "tf32=None,\n", "tpu_metrics_debug=False,\n", "tpu_num_cores=None,\n", "use_legacy_prediction_loop=False,\n", "warmup_ratio=0.0,\n", "warmup_steps=0,\n", "weight_decay=0.0,\n", "xpu_backend=None,\n", ")\n", "Downloading: 23.3kB [00:00, 23.5MB/s] \n", "Downloading: 168kB [00:00, 105MB/s] \n", "Downloading and preparing dataset common_voice/ab (download: 39.14 MiB, generated: 40.14 MiB, post-processed: Unknown size, total: 79.28 MiB) to /workspace/.cache/huggingface/datasets/common_voice/ab/6.1.0/5693bfc0feeade582a78c2fb250bc88f52bd86f0a7f1bb22bfee67e715de30fd...\n", "Downloading: 100%|█████████████████████████| 41.0M/41.0M [00:03<00:00, 11.4MB/s]\n", "Dataset common_voice downloaded and prepared to /workspace/.cache/huggingface/datasets/common_voice/ab/6.1.0/5693bfc0feeade582a78c2fb250bc88f52bd86f0a7f1bb22bfee67e715de30fd. Subsequent calls will reuse this data.\n", "01/31/2022 16:15:43 - WARNING - datasets.builder - Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/common_voice/ab/6.1.0/5693bfc0feeade582a78c2fb250bc88f52bd86f0a7f1bb22bfee67e715de30fd)\n", "remove special characters from datasets: 22ex [00:00, 6184.63ex/s]\n", "remove special characters from datasets: 9ex [00:00, 5925.09ex/s]\n", "https://huggingface.co/hf-test/xls-r-dummy/resolve/main/config.json not found in cache or force_download set to True, downloading to /workspace/.cache/huggingface/transformers/tmpj1jsvojt\n", "Downloading: 100%|█████████████████████████| 1.95k/1.95k [00:00<00:00, 2.32MB/s]\n", "storing https://huggingface.co/hf-test/xls-r-dummy/resolve/main/config.json in cache at /workspace/.cache/huggingface/transformers/8157526a5096028eb61c63d228d882e5437edef5cb8b1a033ae35bf6249d1568.80b921aeb31bf1fa045a15aafa0e6f7e2ac68d338c1d83a3c76c99e260b22a62\n", "creating metadata file for /workspace/.cache/huggingface/transformers/8157526a5096028eb61c63d228d882e5437edef5cb8b1a033ae35bf6249d1568.80b921aeb31bf1fa045a15aafa0e6f7e2ac68d338c1d83a3c76c99e260b22a62\n", "loading configuration file https://huggingface.co/hf-test/xls-r-dummy/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/8157526a5096028eb61c63d228d882e5437edef5cb8b1a033ae35bf6249d1568.80b921aeb31bf1fa045a15aafa0e6f7e2ac68d338c1d83a3c76c99e260b22a62\n", "Model config Wav2Vec2Config {\n", " \"_name_or_path\": \"hf-test/xls-r-dummy\",\n", " \"activation_dropout\": 0.1,\n", " \"adapter_kernel_size\": 3,\n", " \"adapter_stride\": 2,\n", " \"add_adapter\": false,\n", " \"apply_spec_augment\": true,\n", " \"architectures\": [\n", " \"Wav2Vec2Model\"\n", " ],\n", " \"attention_dropout\": 0.1,\n", " \"bos_token_id\": 1,\n", " \"classifier_proj_size\": 256,\n", " \"codevector_dim\": 256,\n", " \"contrastive_logits_temperature\": 0.1,\n", " \"conv_bias\": false,\n", " \"conv_dim\": [\n", " 32,\n", " 32,\n", " 32\n", " ],\n", " \"conv_kernel\": [\n", " 8,\n", " 8,\n", " 8\n", " ],\n", " \"conv_stride\": [\n", " 4,\n", " 4,\n", " 4\n", " ],\n", " \"ctc_loss_reduction\": \"sum\",\n", " \"ctc_zero_infinity\": false,\n", " \"diversity_loss_weight\": 0.1,\n", " \"do_stable_layer_norm\": true,\n", " \"eos_token_id\": 2,\n", " \"feat_extract_activation\": \"gelu\",\n", " \"feat_extract_dropout\": 0.0,\n", " \"feat_extract_norm\": \"layer\",\n", " \"feat_proj_dropout\": 0.1,\n", " \"feat_quantizer_dropout\": 0.0,\n", " \"final_dropout\": 0.1,\n", " \"gradient_checkpointing\": false,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout\": 0.1,\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 16,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 20,\n", " \"layer_norm_eps\": 1e-05,\n", " \"layerdrop\": 0.1,\n", " \"mask_feature_length\": 10,\n", " \"mask_feature_min_masks\": 0,\n", " \"mask_feature_prob\": 0.0,\n", " \"mask_time_length\": 10,\n", " \"mask_time_min_masks\": 2,\n", " \"mask_time_prob\": 0.05,\n", " \"model_type\": \"wav2vec2\",\n", " \"num_adapter_layers\": 3,\n", " \"num_attention_heads\": 2,\n", " \"num_codevector_groups\": 2,\n", " \"num_codevectors_per_group\": 320,\n", " \"num_conv_pos_embedding_groups\": 2,\n", " \"num_conv_pos_embeddings\": 16,\n", " \"num_feat_extract_layers\": 3,\n", " \"num_hidden_layers\": 4,\n", " \"num_negatives\": 10,\n", " \"output_hidden_size\": 16,\n", " \"pad_token_id\": 0,\n", " \"proj_codevector_dim\": 256,\n", " \"tdnn_dilation\": [\n", " 1,\n", " 2,\n", " 3,\n", " 1,\n", " 1\n", " ],\n", " \"tdnn_dim\": [\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 1500\n", " ],\n", " \"tdnn_kernel\": [\n", " 5,\n", " 3,\n", " 3,\n", " 1,\n", " 1\n", " ],\n", " \"torch_dtype\": \"float32\",\n", " \"transformers_version\": \"4.17.0.dev0\",\n", " \"use_weighted_layer_sum\": false,\n", " \"vocab_size\": 32,\n", " \"xvector_output_dim\": 512\n", "}\n", "\n", "100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 478.47ba/s]\n", "100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 811.12ba/s]\n", "Didn't find file ./tokenizer_config.json. We won't load it.\n", "Didn't find file ./added_tokens.json. We won't load it.\n", "Didn't find file ./special_tokens_map.json. We won't load it.\n", "Didn't find file ./tokenizer.json. We won't load it.\n", "loading file ./vocab.json\n", "loading file None\n", "loading file None\n", "loading file None\n", "loading file None\n", "file ./config.json not found\n", "Adding to the vocabulary\n", "Adding to the vocabulary\n", "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", "loading configuration file https://huggingface.co/hf-test/xls-r-dummy/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/8157526a5096028eb61c63d228d882e5437edef5cb8b1a033ae35bf6249d1568.80b921aeb31bf1fa045a15aafa0e6f7e2ac68d338c1d83a3c76c99e260b22a62\n", "Model config Wav2Vec2Config {\n", " \"_name_or_path\": \"hf-test/xls-r-dummy\",\n", " \"activation_dropout\": 0.1,\n", " \"adapter_kernel_size\": 3,\n", " \"adapter_stride\": 2,\n", " \"add_adapter\": false,\n", " \"apply_spec_augment\": true,\n", " \"architectures\": [\n", " \"Wav2Vec2Model\"\n", " ],\n", " \"attention_dropout\": 0.1,\n", " \"bos_token_id\": 1,\n", " \"classifier_proj_size\": 256,\n", " \"codevector_dim\": 256,\n", " \"contrastive_logits_temperature\": 0.1,\n", " \"conv_bias\": false,\n", " \"conv_dim\": [\n", " 32,\n", " 32,\n", " 32\n", " ],\n", " \"conv_kernel\": [\n", " 8,\n", " 8,\n", " 8\n", " ],\n", " \"conv_stride\": [\n", " 4,\n", " 4,\n", " 4\n", " ],\n", " \"ctc_loss_reduction\": \"sum\",\n", " \"ctc_zero_infinity\": false,\n", " \"diversity_loss_weight\": 0.1,\n", " \"do_stable_layer_norm\": true,\n", " \"eos_token_id\": 2,\n", " \"feat_extract_activation\": \"gelu\",\n", " \"feat_extract_dropout\": 0.0,\n", " \"feat_extract_norm\": \"layer\",\n", " \"feat_proj_dropout\": 0.1,\n", " \"feat_quantizer_dropout\": 0.0,\n", " \"final_dropout\": 0.1,\n", " \"gradient_checkpointing\": false,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout\": 0.1,\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 16,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 20,\n", " \"layer_norm_eps\": 1e-05,\n", " \"layerdrop\": 0.1,\n", " \"mask_feature_length\": 10,\n", " \"mask_feature_min_masks\": 0,\n", " \"mask_feature_prob\": 0.0,\n", " \"mask_time_length\": 10,\n", " \"mask_time_min_masks\": 2,\n", " \"mask_time_prob\": 0.05,\n", " \"model_type\": \"wav2vec2\",\n", " \"num_adapter_layers\": 3,\n", " \"num_attention_heads\": 2,\n", " \"num_codevector_groups\": 2,\n", " \"num_codevectors_per_group\": 320,\n", " \"num_conv_pos_embedding_groups\": 2,\n", " \"num_conv_pos_embeddings\": 16,\n", " \"num_feat_extract_layers\": 3,\n", " \"num_hidden_layers\": 4,\n", " \"num_negatives\": 10,\n", " \"output_hidden_size\": 16,\n", " \"pad_token_id\": 0,\n", " \"proj_codevector_dim\": 256,\n", " \"tdnn_dilation\": [\n", " 1,\n", " 2,\n", " 3,\n", " 1,\n", " 1\n", " ],\n", " \"tdnn_dim\": [\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 1500\n", " ],\n", " \"tdnn_kernel\": [\n", " 5,\n", " 3,\n", " 3,\n", " 1,\n", " 1\n", " ],\n", " \"torch_dtype\": \"float32\",\n", " \"transformers_version\": \"4.17.0.dev0\",\n", " \"use_weighted_layer_sum\": false,\n", " \"vocab_size\": 32,\n", " \"xvector_output_dim\": 512\n", "}\n", "\n", "https://huggingface.co/hf-test/xls-r-dummy/resolve/main/preprocessor_config.json not found in cache or force_download set to True, downloading to /workspace/.cache/huggingface/transformers/tmp65bus7d7\n", "Downloading: 100%|██████████████████████████████| 243/243 [00:00<00:00, 295kB/s]\n", "storing https://huggingface.co/hf-test/xls-r-dummy/resolve/main/preprocessor_config.json in cache at /workspace/.cache/huggingface/transformers/0ba9471c5a13055b5740bbac451b95c783dcaead5aacc5d0175959022489c3aa.bd1cf6fc7017d09efe9b164cbc7b32f9bbc3b3bcc243032c6f8e87573bde4292\n", "creating metadata file for /workspace/.cache/huggingface/transformers/0ba9471c5a13055b5740bbac451b95c783dcaead5aacc5d0175959022489c3aa.bd1cf6fc7017d09efe9b164cbc7b32f9bbc3b3bcc243032c6f8e87573bde4292\n", "loading feature extractor configuration file https://huggingface.co/hf-test/xls-r-dummy/resolve/main/preprocessor_config.json from cache at /workspace/.cache/huggingface/transformers/0ba9471c5a13055b5740bbac451b95c783dcaead5aacc5d0175959022489c3aa.bd1cf6fc7017d09efe9b164cbc7b32f9bbc3b3bcc243032c6f8e87573bde4292\n", "Feature extractor Wav2Vec2FeatureExtractor {\n", " \"do_normalize\": true,\n", " \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n", " \"feature_size\": 1,\n", " \"padding_side\": \"right\",\n", " \"padding_value\": 0.0,\n", " \"return_attention_mask\": false,\n", " \"sampling_rate\": 16000\n", "}\n", "\n", "https://huggingface.co/hf-test/xls-r-dummy/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /workspace/.cache/huggingface/transformers/tmpknzbltu6\n", "Downloading: 100%|████████████████████████████| 134k/134k [00:00<00:00, 512kB/s]\n", "storing https://huggingface.co/hf-test/xls-r-dummy/resolve/main/pytorch_model.bin in cache at /workspace/.cache/huggingface/transformers/d374ffdefd19b7dca1d007484e8a16189d261a626cc06a3481bb034d23fe194a.4dc5ab5d8c52b8612a63c422a98f6d3de7e0bbf1469c52e89e028b4ec90e4b43\n", "creating metadata file for /workspace/.cache/huggingface/transformers/d374ffdefd19b7dca1d007484e8a16189d261a626cc06a3481bb034d23fe194a.4dc5ab5d8c52b8612a63c422a98f6d3de7e0bbf1469c52e89e028b4ec90e4b43\n", "loading weights file https://huggingface.co/hf-test/xls-r-dummy/resolve/main/pytorch_model.bin from cache at /workspace/.cache/huggingface/transformers/d374ffdefd19b7dca1d007484e8a16189d261a626cc06a3481bb034d23fe194a.4dc5ab5d8c52b8612a63c422a98f6d3de7e0bbf1469c52e89e028b4ec90e4b43\n", "All model checkpoint weights were used when initializing Wav2Vec2ForCTC.\n", "\n", "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at hf-test/xls-r-dummy and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "preprocess datasets: 22ex [00:00, 70.03ex/s]\n", "preprocess datasets: 9ex [00:00, 100.33ex/s]\n", "100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 837.02ba/s]\n", "100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 1258.42ba/s]\n", "Downloading: 4.48kB [00:00, 5.81MB/s] \n", "Configuration saved in ./preprocessor_config.json\n", "tokenizer config file saved in ./tokenizer_config.json\n", "Special tokens file saved in ./special_tokens_map.json\n", "added tokens file saved in ./added_tokens.json\n", "Configuration saved in ./config.json\n", "loading feature extractor configuration file ./preprocessor_config.json\n", "loading configuration file ./config.json\n", "Model config Wav2Vec2Config {\n", " \"_name_or_path\": \"./\",\n", " \"activation_dropout\": 0.0,\n", " \"adapter_kernel_size\": 3,\n", " \"adapter_stride\": 2,\n", " \"add_adapter\": false,\n", " \"apply_spec_augment\": true,\n", " \"architectures\": [\n", " \"Wav2Vec2Model\"\n", " ],\n", " \"attention_dropout\": 0.0,\n", " \"bos_token_id\": 1,\n", " \"classifier_proj_size\": 256,\n", " \"codevector_dim\": 256,\n", " \"contrastive_logits_temperature\": 0.1,\n", " \"conv_bias\": false,\n", " \"conv_dim\": [\n", " 32,\n", " 32,\n", " 32\n", " ],\n", " \"conv_kernel\": [\n", " 8,\n", " 8,\n", " 8\n", " ],\n", " \"conv_stride\": [\n", " 4,\n", " 4,\n", " 4\n", " ],\n", " \"ctc_loss_reduction\": \"mean\",\n", " \"ctc_zero_infinity\": false,\n", " \"diversity_loss_weight\": 0.1,\n", " \"do_stable_layer_norm\": true,\n", " \"eos_token_id\": 2,\n", " \"feat_extract_activation\": \"gelu\",\n", " \"feat_extract_dropout\": 0.0,\n", " \"feat_extract_norm\": \"layer\",\n", " \"feat_proj_dropout\": 0.0,\n", " \"feat_quantizer_dropout\": 0.0,\n", " \"final_dropout\": 0.0,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout\": 0.0,\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 16,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 20,\n", " \"layer_norm_eps\": 1e-05,\n", " \"layerdrop\": 0.0,\n", " \"mask_feature_length\": 10,\n", " \"mask_feature_min_masks\": 0,\n", " \"mask_feature_prob\": 0.0,\n", " \"mask_time_length\": 10,\n", " \"mask_time_min_masks\": 2,\n", " \"mask_time_prob\": 0.05,\n", " \"model_type\": \"wav2vec2\",\n", " \"num_adapter_layers\": 3,\n", " \"num_attention_heads\": 2,\n", " \"num_codevector_groups\": 2,\n", " \"num_codevectors_per_group\": 320,\n", " \"num_conv_pos_embedding_groups\": 2,\n", " \"num_conv_pos_embeddings\": 16,\n", " \"num_feat_extract_layers\": 3,\n", " \"num_hidden_layers\": 4,\n", " \"num_negatives\": 10,\n", " \"output_hidden_size\": 16,\n", " \"pad_token_id\": 45,\n", " \"proj_codevector_dim\": 256,\n", " \"tdnn_dilation\": [\n", " 1,\n", " 2,\n", " 3,\n", " 1,\n", " 1\n", " ],\n", " \"tdnn_dim\": [\n", " 512,\n", " 512,\n", " 512,\n", " 512,\n", " 1500\n", " ],\n", " \"tdnn_kernel\": [\n", " 5,\n", " 3,\n", " 3,\n", " 1,\n", " 1\n", " ],\n", " \"torch_dtype\": \"float32\",\n", " \"transformers_version\": \"4.17.0.dev0\",\n", " \"use_weighted_layer_sum\": false,\n", " \"vocab_size\": 48,\n", " \"xvector_output_dim\": 512\n", "}\n", "\n", "loading feature extractor configuration file ./preprocessor_config.json\n", "Feature extractor Wav2Vec2FeatureExtractor {\n", " \"do_normalize\": true,\n", " \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n", " \"feature_size\": 1,\n", " \"padding_side\": \"right\",\n", " \"padding_value\": 0.0,\n", " \"return_attention_mask\": false,\n", " \"sampling_rate\": 16000\n", "}\n", "\n", "Didn't find file ./tokenizer.json. We won't load it.\n", "loading file ./vocab.json\n", "loading file ./tokenizer_config.json\n", "loading file ./added_tokens.json\n", "loading file ./special_tokens_map.json\n", "loading file None\n", "Adding to the vocabulary\n", "Adding to the vocabulary\n", "max_steps is given, it will override any value given in num_train_epochs\n", "Using amp half precision backend\n", "The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "/opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " warnings.warn(\n", "***** Running training *****\n", " Num examples = 22\n", " Num Epochs = 1\n", " Instantaneous batch size per device = 2\n", " Total train batch size (w. parallel, distributed & accumulation) = 2\n", " Gradient Accumulation steps = 1\n", " Total optimization steps = 10\n", " 50%|██████████████████████ | 5/10 [00:00<00:00, 8.80it/s]Saving model checkpoint to ./checkpoint-5\n", "Configuration saved in ./checkpoint-5/config.json\n", "Model weights saved in ./checkpoint-5/pytorch_model.bin\n", "Configuration saved in ./checkpoint-5/preprocessor_config.json\n", " 90%|███████████████████████████████████████▌ | 9/10 [00:00<00:00, 9.41it/s]Saving model checkpoint to ./checkpoint-10\n", "Configuration saved in ./checkpoint-10/config.json\n", "Model weights saved in ./checkpoint-10/pytorch_model.bin\n", "Configuration saved in ./checkpoint-10/preprocessor_config.json\n", "Deleting older checkpoint [checkpoint-5] due to args.save_total_limit\n", "\n", "\n", "Training completed. Do not forget to share your model on huggingface.co/models =)\n", "\n", "\n", "{'train_runtime': 1.0733, 'train_samples_per_second': 18.635, 'train_steps_per_second': 9.317, 'train_loss': 132.1487060546875, 'epoch': 0.91}\n", "100%|███████████████████████████████████████████| 10/10 [00:01<00:00, 9.32it/s]\n", "Saving model checkpoint to ./\n", "Configuration saved in ./config.json\n", "Model weights saved in ./pytorch_model.bin\n", "Configuration saved in ./preprocessor_config.json\n", "***** train metrics *****\n", " epoch = 0.91\n", " train_loss = 132.1487\n", " train_runtime = 0:00:01.07\n", " train_samples = 22\n", " train_samples_per_second = 18.635\n", " train_steps_per_second = 9.317\n", "01/31/2022 16:15:52 - INFO - __main__ - *** Evaluate ***\n", "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", "***** Running Evaluation *****\n", " Num examples = 9\n", " Batch size = 8\n", "100%|█████████████████████████████████████████████| 2/2 [00:00<00:00, 35.19it/s]\n", "***** eval metrics *****\n", " epoch = 0.91\n", " eval_loss = 128.2049\n", " eval_runtime = 0:00:00.32\n", " eval_samples = 9\n", " eval_samples_per_second = 27.51\n", " eval_steps_per_second = 6.113\n", " eval_wer = 14.1429\n", "Dropping the following result as it does not have all the necessary fields:\n", "{'dataset': {'name': 'COMMON_VOICE - AB', 'type': 'common_voice', 'args': 'Config: ab, Training split: train+validation, Eval split: test'}}\n" ] } ], "source": [ "!python run_speech_recognition_ctc.py \\\n", "\t--dataset_name=\"common_voice\" \\\n", "\t--model_name_or_path=\"hf-test/xls-r-dummy\" \\\n", "\t--dataset_config_name=\"ab\" \\\n", "\t--output_dir=\"./\" \\\n", "\t--overwrite_output_dir \\\n", "\t--max_steps=\"10\" \\\n", "\t--per_device_train_batch_size=\"2\" \\\n", "\t--learning_rate=\"3e-4\" \\\n", "\t--save_total_limit=\"1\" \\\n", "\t--evaluation_strategy=\"steps\" \\\n", "\t--text_column_name=\"sentence\" \\\n", "\t--length_column_name=\"input_length\" \\\n", "\t--save_steps=\"5\" \\\n", "\t--layerdrop=\"0.0\" \\\n", "\t--freeze_feature_encoder \\\n", "\t--gradient_checkpointing \\\n", "\t--fp16 \\\n", "\t--group_by_length \\\n", "\t--do_train --do_eval" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "authorship_tag": "ABX9TyM3OaMlm9YQtKpl28c8gBBd", "include_colab_link": true, "name": "DebugOVHTransformers.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 4 }