diff --git "a/debugger_ovh_transformers.ipynb" "b/debugger_ovh_transformers.ipynb" new file mode 100644--- /dev/null +++ "b/debugger_ovh_transformers.ipynb" @@ -0,0 +1,1223 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# HuggingFace challenge - Debugger notebook\n", + "Run this notebook to verify your libraries versions, check GPU config and run a quick training" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "T2utsYSKszvv" + }, + "outputs": [], + "source": [ + "import platform\n", + "import multiprocessing\n", + "\n", + "import torch\n", + "import transformers\n", + "import datasets\n", + "\n", + "import soundfile" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Print main infos" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Check your GPU informations (if any)\n", + "If you launched an AI Training job with GPU resources, they should be listed below (Tesla V100s 32GB).\n", + "Driver and CUDA version " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TorMtpwPv6RQ" + }, + "source": [ + "## Quick training run with a dummy model and data\n", + "more information on https://github.com/huggingface/transformers/tree/master/examples/pytorch/speech-recognition" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fevoJD15u4Ss", + "outputId": "5861d34e-745b-45ee-e780-ed363043e655" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2022-01-31 17:29:51-- https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 30360 (30K) [text/plain]\n", + "Saving to: ‘run_speech_recognition_ctc.py’\n", + "\n", + "run_speech_recognit 100%[===================>] 29.65K --.-KB/s in 0s \n", + "\n", + "2022-01-31 17:29:51 (58.1 MB/s) - ‘run_speech_recognition_ctc.py’ saved [30360/30360]\n", + "\n" + ] + } + ], + "source": [ + "!wget -O run_speech_recognition_ctc.py https://raw.githubusercontent.com/huggingface/transformers/master/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Mz4bubhxxsad", + "outputId": "23398525-cc19-43c2-9fec-497e06214f29" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "01/31/2022 17:33:12 - WARNING - __main__ - Process rank: -1, device: cuda:0, n_gpu: 1distributed training: False, 16-bits training: True\n", + "01/31/2022 17:33:12 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", + "_n_gpu=1,\n", + "adafactor=False,\n", + "adam_beta1=0.9,\n", + "adam_beta2=0.999,\n", + "adam_epsilon=1e-08,\n", + "bf16=False,\n", + "bf16_full_eval=False,\n", + "dataloader_drop_last=False,\n", + "dataloader_num_workers=0,\n", + "dataloader_pin_memory=True,\n", + "ddp_bucket_cap_mb=None,\n", + "ddp_find_unused_parameters=None,\n", + "debug=[],\n", + "deepspeed=None,\n", + "disable_tqdm=False,\n", + "do_eval=True,\n", + "do_predict=False,\n", + "do_train=True,\n", + "eval_accumulation_steps=None,\n", + "eval_steps=5,\n", + "evaluation_strategy=IntervalStrategy.STEPS,\n", + "fp16=True,\n", + "fp16_backend=auto,\n", + "fp16_full_eval=False,\n", + "fp16_opt_level=O1,\n", + "gradient_accumulation_steps=4,\n", + "gradient_checkpointing=True,\n", + "greater_is_better=None,\n", + "group_by_length=True,\n", + "half_precision_backend=auto,\n", + "hub_model_id=None,\n", + "hub_strategy=HubStrategy.EVERY_SAVE,\n", + "hub_token=,\n", + "ignore_data_skip=False,\n", + "label_names=None,\n", + "label_smoothing_factor=0.0,\n", + "learning_rate=7.5e-05,\n", + "length_column_name=input_length,\n", + "load_best_model_at_end=False,\n", + "local_rank=-1,\n", + "log_level=-1,\n", + "log_level_replica=-1,\n", + "log_on_each_node=True,\n", + "logging_dir=./runs/Jan31_17-33-12_job-6a6be32c-c82d-4385-805b-1f7606124d5b,\n", + "logging_first_step=False,\n", + "logging_nan_inf_filter=True,\n", + "logging_steps=1,\n", + "logging_strategy=IntervalStrategy.STEPS,\n", + "lr_scheduler_type=SchedulerType.LINEAR,\n", + "max_grad_norm=1.0,\n", + "max_steps=-1,\n", + "metric_for_best_model=None,\n", + "mp_parameters=,\n", + "no_cuda=False,\n", + "num_train_epochs=5.0,\n", + "optim=OptimizerNames.ADAMW_HF,\n", + "output_dir=./,\n", + "overwrite_output_dir=True,\n", + "past_index=-1,\n", + "per_device_eval_batch_size=8,\n", + "per_device_train_batch_size=8,\n", + "prediction_loss_only=False,\n", + "push_to_hub=True,\n", + "push_to_hub_model_id=None,\n", + "push_to_hub_organization=None,\n", + "push_to_hub_token=,\n", + "remove_unused_columns=True,\n", + "report_to=[],\n", + "resume_from_checkpoint=None,\n", + "run_name=./,\n", + "save_on_each_node=False,\n", + "save_steps=5,\n", + "save_strategy=IntervalStrategy.STEPS,\n", + "save_total_limit=3,\n", + "seed=42,\n", + "sharded_ddp=[],\n", + "skip_memory_metrics=True,\n", + "tf32=None,\n", + "tpu_metrics_debug=False,\n", + "tpu_num_cores=None,\n", + "use_legacy_prediction_loop=False,\n", + "warmup_ratio=0.0,\n", + "warmup_steps=20,\n", + "weight_decay=0.0,\n", + "xpu_backend=None,\n", + ")\n", + "Downloading and preparing dataset common_voice/sv-SE to /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/sv-SE/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8...\n", + "Downloading: 100%|█████████████████████████| 1.11G/1.11G [00:57<00:00, 19.5MB/s]\n", + "Dataset common_voice downloaded and prepared to /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/sv-SE/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8. Subsequent calls will reuse this data.\n", + "01/31/2022 17:35:06 - WARNING - datasets.builder - Reusing dataset common_voice (/workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/sv-SE/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8)\n", + "remove special characters from datasets: 11690ex [00:01, 6055.84ex/s]\n", + "remove special characters from datasets: 4843ex [00:00, 6185.81ex/s]\n", + "https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json not found in cache or force_download set to True, downloading to /workspace/.cache/huggingface/transformers/tmphwj4pp4l\n", + "Downloading: 100%|█████████████████████████| 1.53k/1.53k [00:00<00:00, 1.37MB/s]\n", + "storing https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json in cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6\n", + "creating metadata file for /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6\n", + "loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6\n", + "Model config Wav2Vec2Config {\n", + " \"_name_or_path\": \"facebook/wav2vec2-xls-r-300m\",\n", + " \"activation_dropout\": 0.0,\n", + " \"adapter_kernel_size\": 3,\n", + " \"adapter_stride\": 2,\n", + " \"add_adapter\": false,\n", + " \"apply_spec_augment\": true,\n", + " \"architectures\": [\n", + " \"Wav2Vec2ForPreTraining\"\n", + " ],\n", + " \"attention_dropout\": 0.1,\n", + " \"bos_token_id\": 1,\n", + " \"classifier_proj_size\": 256,\n", + " \"codevector_dim\": 768,\n", + " \"contrastive_logits_temperature\": 0.1,\n", + " \"conv_bias\": true,\n", + " \"conv_dim\": [\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512\n", + " ],\n", + " \"conv_kernel\": [\n", + " 10,\n", + " 3,\n", + " 3,\n", + " 3,\n", + " 3,\n", + " 2,\n", + " 2\n", + " ],\n", + " \"conv_stride\": [\n", + " 5,\n", + " 2,\n", + " 2,\n", + " 2,\n", + " 2,\n", + " 2,\n", + " 2\n", + " ],\n", + " \"ctc_loss_reduction\": \"sum\",\n", + " \"ctc_zero_infinity\": false,\n", + " \"diversity_loss_weight\": 0.1,\n", + " \"do_stable_layer_norm\": true,\n", + " \"eos_token_id\": 2,\n", + " \"feat_extract_activation\": \"gelu\",\n", + " \"feat_extract_dropout\": 0.0,\n", + " \"feat_extract_norm\": \"layer\",\n", + " \"feat_proj_dropout\": 0.1,\n", + " \"feat_quantizer_dropout\": 0.0,\n", + " \"final_dropout\": 0.0,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout\": 0.1,\n", + " \"hidden_size\": 1024,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4096,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"layerdrop\": 0.1,\n", + " \"mask_feature_length\": 10,\n", + " \"mask_feature_min_masks\": 0,\n", + " \"mask_feature_prob\": 0.0,\n", + " \"mask_time_length\": 10,\n", + " \"mask_time_min_masks\": 2,\n", + " \"mask_time_prob\": 0.075,\n", + " \"model_type\": \"wav2vec2\",\n", + " \"num_adapter_layers\": 3,\n", + " \"num_attention_heads\": 16,\n", + " \"num_codevector_groups\": 2,\n", + " \"num_codevectors_per_group\": 320,\n", + " \"num_conv_pos_embedding_groups\": 16,\n", + " \"num_conv_pos_embeddings\": 128,\n", + " \"num_feat_extract_layers\": 7,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_negatives\": 100,\n", + " \"output_hidden_size\": 1024,\n", + " \"pad_token_id\": 0,\n", + " \"proj_codevector_dim\": 768,\n", + " \"tdnn_dilation\": [\n", + " 1,\n", + " 2,\n", + " 3,\n", + " 1,\n", + " 1\n", + " ],\n", + " \"tdnn_dim\": [\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 1500\n", + " ],\n", + " \"tdnn_kernel\": [\n", + " 5,\n", + " 3,\n", + " 3,\n", + " 1,\n", + " 1\n", + " ],\n", + " \"torch_dtype\": \"float32\",\n", + " \"transformers_version\": \"4.17.0.dev0\",\n", + " \"use_weighted_layer_sum\": false,\n", + " \"vocab_size\": 32,\n", + " \"xvector_output_dim\": 512\n", + "}\n", + "\n", + "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 3.01ba/s]\n", + "100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 8.48ba/s]\n", + "Didn't find file ./tokenizer_config.json. We won't load it.\n", + "Didn't find file ./added_tokens.json. We won't load it.\n", + "Didn't find file ./special_tokens_map.json. We won't load it.\n", + "Didn't find file ./tokenizer.json. We won't load it.\n", + "loading file ./vocab.json\n", + "loading file None\n", + "loading file None\n", + "loading file None\n", + "loading file None\n", + "file ./config.json not found\n", + "Adding to the vocabulary\n", + "Adding to the vocabulary\n", + "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n", + "loading configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/config.json from cache at /workspace/.cache/huggingface/transformers/dabc27df63e37bd2a7a221c7774e35f36a280fbdf917cf54cadfc7df8c786f6f.a3e4c3c967d9985881e0ae550a5f6f668f897db5ab2e0802f9b97973b15970e6\n", + "Model config Wav2Vec2Config {\n", + " \"_name_or_path\": \"facebook/wav2vec2-xls-r-300m\",\n", + " \"activation_dropout\": 0.0,\n", + " \"adapter_kernel_size\": 3,\n", + " \"adapter_stride\": 2,\n", + " \"add_adapter\": false,\n", + " \"apply_spec_augment\": true,\n", + " \"architectures\": [\n", + " \"Wav2Vec2ForPreTraining\"\n", + " ],\n", + " \"attention_dropout\": 0.1,\n", + " \"bos_token_id\": 1,\n", + " \"classifier_proj_size\": 256,\n", + " \"codevector_dim\": 768,\n", + " \"contrastive_logits_temperature\": 0.1,\n", + " \"conv_bias\": true,\n", + " \"conv_dim\": [\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512\n", + " ],\n", + " \"conv_kernel\": [\n", + " 10,\n", + " 3,\n", + " 3,\n", + " 3,\n", + " 3,\n", + " 2,\n", + " 2\n", + " ],\n", + " \"conv_stride\": [\n", + " 5,\n", + " 2,\n", + " 2,\n", + " 2,\n", + " 2,\n", + " 2,\n", + " 2\n", + " ],\n", + " \"ctc_loss_reduction\": \"sum\",\n", + " \"ctc_zero_infinity\": false,\n", + " \"diversity_loss_weight\": 0.1,\n", + " \"do_stable_layer_norm\": true,\n", + " \"eos_token_id\": 2,\n", + " \"feat_extract_activation\": \"gelu\",\n", + " \"feat_extract_dropout\": 0.0,\n", + " \"feat_extract_norm\": \"layer\",\n", + " \"feat_proj_dropout\": 0.1,\n", + " \"feat_quantizer_dropout\": 0.0,\n", + " \"final_dropout\": 0.0,\n", + " \"gradient_checkpointing\": false,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout\": 0.1,\n", + " \"hidden_size\": 1024,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4096,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"layerdrop\": 0.1,\n", + " \"mask_feature_length\": 10,\n", + " \"mask_feature_min_masks\": 0,\n", + " \"mask_feature_prob\": 0.0,\n", + " \"mask_time_length\": 10,\n", + " \"mask_time_min_masks\": 2,\n", + " \"mask_time_prob\": 0.075,\n", + " \"model_type\": \"wav2vec2\",\n", + " \"num_adapter_layers\": 3,\n", + " \"num_attention_heads\": 16,\n", + " \"num_codevector_groups\": 2,\n", + " \"num_codevectors_per_group\": 320,\n", + " \"num_conv_pos_embedding_groups\": 16,\n", + " \"num_conv_pos_embeddings\": 128,\n", + " \"num_feat_extract_layers\": 7,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_negatives\": 100,\n", + " \"output_hidden_size\": 1024,\n", + " \"pad_token_id\": 0,\n", + " \"proj_codevector_dim\": 768,\n", + " \"tdnn_dilation\": [\n", + " 1,\n", + " 2,\n", + " 3,\n", + " 1,\n", + " 1\n", + " ],\n", + " \"tdnn_dim\": [\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 1500\n", + " ],\n", + " \"tdnn_kernel\": [\n", + " 5,\n", + " 3,\n", + " 3,\n", + " 1,\n", + " 1\n", + " ],\n", + " \"torch_dtype\": \"float32\",\n", + " \"transformers_version\": \"4.17.0.dev0\",\n", + " \"use_weighted_layer_sum\": false,\n", + " \"vocab_size\": 32,\n", + " \"xvector_output_dim\": 512\n", + "}\n", + "\n", + "https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/preprocessor_config.json not found in cache or force_download set to True, downloading to /workspace/.cache/huggingface/transformers/tmp7kx5wcin\n", + "Downloading: 100%|██████████████████████████████| 212/212 [00:00<00:00, 239kB/s]\n", + "storing https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/preprocessor_config.json in cache at /workspace/.cache/huggingface/transformers/6fb028b95b394059e7d3b367bbca2382b576c66aebe896f04d2cd34e1b575f5b.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326\n", + "creating metadata file for /workspace/.cache/huggingface/transformers/6fb028b95b394059e7d3b367bbca2382b576c66aebe896f04d2cd34e1b575f5b.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326\n", + "loading feature extractor configuration file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/preprocessor_config.json from cache at /workspace/.cache/huggingface/transformers/6fb028b95b394059e7d3b367bbca2382b576c66aebe896f04d2cd34e1b575f5b.d4484dc1c81456a2461485e7168b04347a7b9a4e3b1ef3aba723323b33e12326\n", + "Feature extractor Wav2Vec2FeatureExtractor {\n", + " \"do_normalize\": true,\n", + " \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n", + " \"feature_size\": 1,\n", + " \"padding_side\": \"right\",\n", + " \"padding_value\": 0,\n", + " \"return_attention_mask\": true,\n", + " \"sampling_rate\": 16000\n", + "}\n", + "\n", + "https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/pytorch_model.bin not found in cache or force_download set to True, downloading to /workspace/.cache/huggingface/transformers/tmppk1v7kfg\n", + "Downloading: 100%|█████████████████████████| 1.18G/1.18G [00:14<00:00, 89.1MB/s]\n", + "storing https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/pytorch_model.bin in cache at /workspace/.cache/huggingface/transformers/1e6a6507f3b689035cd4b247e2a37c154e27f39143f31357a49b4e38baeccc36.1edb32803799e27ed554eb7dd935f6745b1a0b17b0ea256442fe24db6eb546cd\n", + "creating metadata file for /workspace/.cache/huggingface/transformers/1e6a6507f3b689035cd4b247e2a37c154e27f39143f31357a49b4e38baeccc36.1edb32803799e27ed554eb7dd935f6745b1a0b17b0ea256442fe24db6eb546cd\n", + "loading weights file https://huggingface.co/facebook/wav2vec2-xls-r-300m/resolve/main/pytorch_model.bin from cache at /workspace/.cache/huggingface/transformers/1e6a6507f3b689035cd4b247e2a37c154e27f39143f31357a49b4e38baeccc36.1edb32803799e27ed554eb7dd935f6745b1a0b17b0ea256442fe24db6eb546cd\n", + "Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['quantizer.codevectors', 'project_hid.weight', 'quantizer.weight_proj.bias', 'project_q.weight', 'quantizer.weight_proj.weight', 'project_hid.bias', 'project_q.bias']\n", + "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.weight', 'lm_head.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", + "preprocess datasets: 11690ex [01:15, 154.39ex/s]\n", + "preprocess datasets: 4843ex [00:33, 146.23ex/s]\n", + "100%|██████████████████████████████████████████| 12/12 [00:00<00:00, 734.35ba/s]\n", + "100%|████████████████████████████████████████████| 5/5 [00:00<00:00, 734.50ba/s]\n", + "Configuration saved in ./preprocessor_config.json\n", + "tokenizer config file saved in ./tokenizer_config.json\n", + "Special tokens file saved in ./special_tokens_map.json\n", + "added tokens file saved in ./added_tokens.json\n", + "Configuration saved in ./config.json\n", + "loading feature extractor configuration file ./preprocessor_config.json\n", + "loading configuration file ./config.json\n", + "Model config Wav2Vec2Config {\n", + " \"_name_or_path\": \"./\",\n", + " \"activation_dropout\": 0.1,\n", + " \"adapter_kernel_size\": 3,\n", + " \"adapter_stride\": 2,\n", + " \"add_adapter\": false,\n", + " \"apply_spec_augment\": true,\n", + " \"architectures\": [\n", + " \"Wav2Vec2ForPreTraining\"\n", + " ],\n", + " \"attention_dropout\": 0.0,\n", + " \"bos_token_id\": 1,\n", + " \"classifier_proj_size\": 256,\n", + " \"codevector_dim\": 768,\n", + " \"contrastive_logits_temperature\": 0.1,\n", + " \"conv_bias\": true,\n", + " \"conv_dim\": [\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512\n", + " ],\n", + " \"conv_kernel\": [\n", + " 10,\n", + " 3,\n", + " 3,\n", + " 3,\n", + " 3,\n", + " 2,\n", + " 2\n", + " ],\n", + " \"conv_stride\": [\n", + " 5,\n", + " 2,\n", + " 2,\n", + " 2,\n", + " 2,\n", + " 2,\n", + " 2\n", + " ],\n", + " \"ctc_loss_reduction\": \"mean\",\n", + " \"ctc_zero_infinity\": false,\n", + " \"diversity_loss_weight\": 0.1,\n", + " \"do_stable_layer_norm\": true,\n", + " \"eos_token_id\": 2,\n", + " \"feat_extract_activation\": \"gelu\",\n", + " \"feat_extract_dropout\": 0.0,\n", + " \"feat_extract_norm\": \"layer\",\n", + " \"feat_proj_dropout\": 0.0,\n", + " \"feat_quantizer_dropout\": 0.0,\n", + " \"final_dropout\": 0.0,\n", + " \"hidden_act\": \"gelu\",\n", + " \"hidden_dropout\": 0.0,\n", + " \"hidden_size\": 1024,\n", + " \"initializer_range\": 0.02,\n", + " \"intermediate_size\": 4096,\n", + " \"layer_norm_eps\": 1e-05,\n", + " \"layerdrop\": 0.0,\n", + " \"mask_feature_length\": 64,\n", + " \"mask_feature_min_masks\": 0,\n", + " \"mask_feature_prob\": 0.25,\n", + " \"mask_time_length\": 10,\n", + " \"mask_time_min_masks\": 2,\n", + " \"mask_time_prob\": 0.75,\n", + " \"model_type\": \"wav2vec2\",\n", + " \"num_adapter_layers\": 3,\n", + " \"num_attention_heads\": 16,\n", + " \"num_codevector_groups\": 2,\n", + " \"num_codevectors_per_group\": 320,\n", + " \"num_conv_pos_embedding_groups\": 16,\n", + " \"num_conv_pos_embeddings\": 128,\n", + " \"num_feat_extract_layers\": 7,\n", + " \"num_hidden_layers\": 24,\n", + " \"num_negatives\": 100,\n", + " \"output_hidden_size\": 1024,\n", + " \"pad_token_id\": 34,\n", + " \"proj_codevector_dim\": 768,\n", + " \"tdnn_dilation\": [\n", + " 1,\n", + " 2,\n", + " 3,\n", + " 1,\n", + " 1\n", + " ],\n", + " \"tdnn_dim\": [\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 512,\n", + " 1500\n", + " ],\n", + " \"tdnn_kernel\": [\n", + " 5,\n", + " 3,\n", + " 3,\n", + " 1,\n", + " 1\n", + " ],\n", + " \"torch_dtype\": \"float32\",\n", + " \"transformers_version\": \"4.17.0.dev0\",\n", + " \"use_weighted_layer_sum\": false,\n", + " \"vocab_size\": 37,\n", + " \"xvector_output_dim\": 512\n", + "}\n", + "\n", + "loading feature extractor configuration file ./preprocessor_config.json\n", + "Feature extractor Wav2Vec2FeatureExtractor {\n", + " \"do_normalize\": true,\n", + " \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n", + " \"feature_size\": 1,\n", + " \"padding_side\": \"right\",\n", + " \"padding_value\": 0,\n", + " \"return_attention_mask\": true,\n", + " \"sampling_rate\": 16000\n", + "}\n", + "\n", + "Didn't find file ./tokenizer.json. We won't load it.\n", + "loading file ./vocab.json\n", + "loading file ./tokenizer_config.json\n", + "loading file ./added_tokens.json\n", + "loading file ./special_tokens_map.json\n", + "loading file None\n", + "Adding to the vocabulary\n", + "Adding to the vocabulary\n", + "/workspace/xls-r-300m-sv-cv8/./ is already a clone of https://huggingface.co/masapasa/xls-r-300m-sv-cv8. Make sure you pull the latest changes with `repo.git_pull()`.\n", + "01/31/2022 17:37:26 - WARNING - huggingface_hub.repository - /workspace/xls-r-300m-sv-cv8/./ is already a clone of https://huggingface.co/masapasa/xls-r-300m-sv-cv8. Make sure you pull the latest changes with `repo.git_pull()`.\n", + "Using amp half precision backend\n", + "The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "/opt/conda/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", + " warnings.warn(\n", + "***** Running training *****\n", + " Num examples = 11690\n", + " Num Epochs = 5\n", + " Instantaneous batch size per device = 8\n", + " Total train batch size (w. parallel, distributed & accumulation) = 32\n", + " Gradient Accumulation steps = 4\n", + " Total optimization steps = 1825\n", + "{'loss': 11.6636, 'learning_rate': 3.7499999999999997e-06, 'epoch': 0.0} \n", + "{'loss': 10.7654, 'learning_rate': 7.499999999999999e-06, 'epoch': 0.01} \n", + "{'loss': 10.5427, 'learning_rate': 1.1249999999999999e-05, 'epoch': 0.01} \n", + "{'loss': 11.7261, 'learning_rate': 1.4999999999999999e-05, 'epoch': 0.01} \n", + "{'loss': 10.7838, 'learning_rate': 1.875e-05, 'epoch': 0.01} \n", + " 0%| | 5/1825 [00:09<54:59, 1.81s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 4843\n", + " Batch size = 8\n", + "\n", + " 0%| | 0/606 [00:00