diff --git "a/train.ipynb" "b/train.ipynb" new file mode 100644--- /dev/null +++ "b/train.ipynb" @@ -0,0 +1,1818 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "1a08ff40", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using custom data configuration default-8bef3afded73c387\n", + "Reusing dataset json (/workspace/.cache/huggingface/datasets/json/default-8bef3afded73c387/0.0.0/ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b)\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "19b2e8b501604f2daa38a71c2cffd8de", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/1 [00:00\n", + " \n", + " Your browser does not support the audio element.\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import IPython.display as ipd\n", + "import numpy as np\n", + "import random\n", + "\n", + "rand_int = random.randint(0, len(all)-1)\n", + "\n", + "print(all[rand_int][\"sentence\"])\n", + "ipd.Audio(data=all[rand_int][\"audio\"][\"array\"], autoplay=True, rate=16000)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "1d66bd44", + "metadata": { + "id": "eJY7I0XAwe9p" + }, + "outputs": [], + "source": [ + "def prepare_dataset(batch):\n", + " audio = batch[\"audio\"]\n", + "\n", + " # batched output is \"un-batched\"\n", + " batch[\"input_values\"] = processor(audio[\"array\"], sampling_rate=audio[\"sampling_rate\"]).input_values[0]\n", + " batch[\"input_length\"] = len(batch[\"input_values\"])\n", + " \n", + " with processor.as_target_processor():\n", + " batch[\"labels\"] = processor(batch[\"sentence\"]).input_ids\n", + " return batch" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "f5360bdd", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 81, + "referenced_widgets": [ + "c47ea368dd08403aa09b2bafdbb4b580", + "e77cf973d5824ae7b89bafd814805c2a", + "071b7647e1fe49609a48e4281a9efd0f", + "c97c00fcf2e64f18b637337f9244d748", + "9ca82fa27d1043e9ac9f10301e0b33bc", + "cc6c7e9931c140db8ba7a977c4461ce5", + "d207784bda7e4dd8858170f470ae2833", + "0800fef7de6e45d380873f974882d67e", + "926440595aa44c698588e02b86eb8c4c", + "ea2806c776384f1a90e36b72c2c17a44", + "6b72385c07134782995fcd76e675da7c", + "3653b92c9f2a408eac253e1d5153daf4", + "73ffd9b8166c4ec78ff2b62d17690327", + "6b133a1e11e44f68846ff931446559cf", + "7c98818547c84af7ba9284bc20101691", + "41b501a16b2a4f709197af5cdd5227cb", + "3b4fbe2916894e48b8f93ca63e203aca", + "c002386685c0413d8181b054d3f9d49f", + "cfb70829b5e1461abcb01872b74a194c", + "ed943db2b5274022a606ce4103d54425", + "cfb242eb549c4e66afcedefb575b4e38", + "a0313055d29f4a60837e59ac4d8a3870" + ] + }, + "id": "-np9xYK-wl8q", + "outputId": "573f6f67-e5b2-4977-a564-3919e7903592" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c97a52c72198489f89a4481f722ac35a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "0ex [00:00, ?ex/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /workspace/.cache/huggingface/datasets/mozilla-foundation___common_voice/cs/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8/cache-fcc378c48562cf8c.arrow\n" + ] + } + ], + "source": [ + "all = all.map(prepare_dataset, remove_columns=all.column_names)\n", + "common_voice_test = common_voice_test.map(prepare_dataset, remove_columns=common_voice_test.column_names)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "5e8bb4ee", + "metadata": { + "id": "tborvC9hx88e" + }, + "outputs": [], + "source": [ + "import torch\n", + "\n", + "from dataclasses import dataclass, field\n", + "from typing import Any, Dict, List, Optional, Union\n", + "\n", + "@dataclass\n", + "class DataCollatorCTCWithPadding:\n", + " \"\"\"\n", + " Data collator that will dynamically pad the inputs received.\n", + " Args:\n", + " processor (:class:`~transformers.Wav2Vec2Processor`)\n", + " The processor used for proccessing the data.\n", + " padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):\n", + " Select a strategy to pad the returned sequences (according to the model's padding side and padding index)\n", + " among:\n", + " * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single\n", + " sequence if provided).\n", + " * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the\n", + " maximum acceptable input length for the model if that argument is not provided.\n", + " * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of\n", + " different lengths).\n", + " \"\"\"\n", + "\n", + " processor: Wav2Vec2Processor\n", + " padding: Union[bool, str] = True\n", + "\n", + " def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:\n", + " # split inputs and labels since they have to be of different lenghts and need\n", + " # different padding methods\n", + " input_features = [{\"input_values\": feature[\"input_values\"]} for feature in features]\n", + " label_features = [{\"input_ids\": feature[\"labels\"]} for feature in features]\n", + "\n", + " batch = self.processor.pad(\n", + " input_features,\n", + " padding=self.padding,\n", + " return_tensors=\"pt\",\n", + " )\n", + " with self.processor.as_target_processor():\n", + " labels_batch = self.processor.pad(\n", + " label_features,\n", + " padding=self.padding,\n", + " return_tensors=\"pt\",\n", + " )\n", + "\n", + " # replace padding with -100 to ignore loss correctly\n", + " labels = labels_batch[\"input_ids\"].masked_fill(labels_batch.attention_mask.ne(1), -100)\n", + "\n", + " batch[\"labels\"] = labels\n", + "\n", + " return batch" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "98dfd52e", + "metadata": { + "id": "lbQf5GuZyQ4_" + }, + "outputs": [], + "source": [ + "data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "5efc8697", + "metadata": { + "id": "9Xsux2gmyXso" + }, + "outputs": [], + "source": [ + "from datasets import load_metric\n", + "\n", + "wer_metric = load_metric(\"wer\")\n", + "cer_metric = load_metric(\"cer\")\n", + "metrics = [wer_metric, cer_metric]" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "ec29ec29", + "metadata": { + "id": "1XZ-kjweyTy_" + }, + "outputs": [], + "source": [ + "def compute_metrics(pred):\n", + " pred_logits = pred.predictions\n", + " pred_ids = np.argmax(pred_logits, axis=-1)\n", + "\n", + " pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id\n", + "\n", + " pred_str = processor.batch_decode(pred_ids)\n", + " # we do not want to group tokens when computing the metrics\n", + " label_str = processor.batch_decode(pred.label_ids, group_tokens=False)\n", + "\n", + " wer = wer_metric.compute(predictions=pred_str, references=label_str)\n", + " cer = cer_metric.compute(predictions=pred_str, references=label_str)\n", + "\n", + " return {\"wer\": wer, \"cer\": cer}" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "d6d68f86", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "e7cqAWIayn6w", + "outputId": "7a7ef020-bc8f-41e2-846c-645be598312e" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['project_hid.weight', 'quantizer.weight_proj.bias', 'quantizer.codevectors', 'quantizer.weight_proj.weight', 'project_q.bias', 'project_hid.bias', 'project_q.weight']\n", + "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.weight', 'lm_head.bias']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + } + ], + "source": [ + "from transformers import Wav2Vec2ForCTC\n", + "\n", + "model = Wav2Vec2ForCTC.from_pretrained(\n", + " #\"comodoro/wav2vec2-xls-r-300m-cs-cv8\", \n", + " \"facebook/wav2vec2-xls-r-300m\", \n", + " attention_dropout=0.1,\n", + " hidden_dropout=0.1,\n", + " feat_proj_dropout=0.0,\n", + " mask_time_prob=0.1,\n", + " layerdrop=0.1,\n", + " ctc_loss_reduction=\"mean\", \n", + " pad_token_id=processor.tokenizer.pad_token_id,\n", + " vocab_size=len(processor.tokenizer),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "23d91592", + "metadata": { + "id": "oGI8zObtZ3V0" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/workspace/.local/lib/python3.8/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:1700: FutureWarning: The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5.Please use the equivalent `freeze_feature_encoder` method instead.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "model.freeze_feature_extractor()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "bf112a3a", + "metadata": { + "id": "KbeKSV7uzGPP" + }, + "outputs": [], + "source": [ + "from transformers import TrainingArguments\n", + "\n", + "training_args = TrainingArguments(\n", + " output_dir=repo_name,\n", + " group_by_length=True,\n", + " per_device_train_batch_size=16,\n", + " gradient_accumulation_steps=1,\n", + " eval_accumulation_steps=1,\n", + " evaluation_strategy=\"steps\",\n", + " num_train_epochs=50,\n", + " gradient_checkpointing=True,\n", + " fp16=True,\n", + " save_steps=800,\n", + " eval_steps=800,\n", + " logging_steps=250,\n", + " learning_rate=1e-5,\n", + " warmup_steps=600,\n", + " save_total_limit=2,\n", + " report_to=\"tensorboard\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "6d209cae", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rY7vBmFCPFgC", + "outputId": "a180bf3f-f798-4947-ff58-207d7aaab695" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using amp half precision backend\n" + ] + } + ], + "source": [ + "from transformers import Trainer\n", + "\n", + "trainer = Trainer(\n", + " model=model,\n", + " data_collator=data_collator,\n", + " args=training_args,\n", + " compute_metrics=compute_metrics,\n", + " train_dataset=all,\n", + " eval_dataset=common_voice_test,\n", + " tokenizer=processor.feature_extractor,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "350ccf96", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 312 + }, + "id": "9fRr9TG5pGBl", + "outputId": "8bdf1d11-bca1-46af-db67-518f85586f7a" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "/workspace/.local/lib/python3.8/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use thePyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", + " warnings.warn(\n", + "***** Running training *****\n", + " Num examples = 159605\n", + " Num Epochs = 50\n", + " Instantaneous batch size per device = 16\n", + " Total train batch size (w. parallel, distributed & accumulation) = 16\n", + " Gradient Accumulation steps = 1\n", + " Total optimization steps = 498800\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " [ 31733/498800 13:51:39 < 204:01:41, 0.64 it/s, Epoch 3.18/50]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StepTraining LossValidation LossWerCer
8004.7500003.7664501.0000001.000000
16003.4798003.3611681.0000001.000000
24003.3317003.3014821.0000001.000000
32003.1973002.5847650.9998020.856619
40001.8074000.9086140.6408530.151860
48001.3421000.5504200.4324500.095473
56001.1327000.3940870.3477810.073488
64000.9993000.3231760.2938040.061316
72000.9281000.2731870.2630140.054848
80000.8976000.2446860.2475100.051767
88000.8134000.2271930.2363620.049280
96000.7758000.2114540.2257690.047510
104000.7572000.2005280.2165220.045622
112000.7297000.1940560.2105820.044185
120000.6983000.1895580.2056710.043360
128000.6818000.1809870.1984440.042125
136000.6446000.1762660.1916120.040599
144000.6454000.1675500.1889990.040012
152000.6461000.1660650.1823260.038584
160000.6483000.1650560.1809200.038490
168000.6226000.1585420.1785830.038353
176000.6024000.1525480.1744850.037284
184000.6159000.1534700.1743660.037303
192000.5873000.1495510.1695940.036240
200000.5823000.1475520.1695940.036146
208000.5414000.1456850.1677920.035813
216000.5711000.1461350.1674550.036029
224000.5650000.1428860.1646240.035194
232000.5555000.1410500.1629010.034871
240000.5226000.1409780.1620500.034359
248000.5350000.1377780.1605850.035011
256000.5525000.1364050.1591190.034666
264000.5190000.1385750.1573970.034004
272000.5227000.1325740.1590600.034317
280000.4931000.1338620.1557530.033766
288000.4949000.1322230.1547230.033183
296000.5091000.1313100.1535350.033147
304000.4879000.1289670.1507040.032260
312000.5005000.1304140.1516540.032628

" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-800\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-800/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-800/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-800/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-4000] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-1600\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-1600/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-1600/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-1600/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-4800] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-2400\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-2400/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-2400/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-2400/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-800] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-3200\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-3200/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-3200/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-3200/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-1600] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-4000\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-4000/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-4000/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-4000/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-2400] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-4800\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-4800/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-4800/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-4800/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-3200] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-5600\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-5600/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-5600/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-5600/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-4000] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-6400\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-6400/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-6400/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-6400/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-4800] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-7200\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-7200/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-7200/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-7200/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-5600] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-8000\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-8000/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-8000/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-8000/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-6400] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-8800\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-8800/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-8800/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-8800/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-7200] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-9600\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-9600/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-9600/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-9600/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-8000] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-10400\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-10400/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-10400/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-10400/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-8800] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-11200\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-11200/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-11200/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-11200/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-9600] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-12000\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-12000/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-12000/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-12000/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-10400] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-12800\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-12800/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-12800/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-12800/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-11200] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-13600\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-13600/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-13600/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-13600/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-12000] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-14400\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-14400/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-14400/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-14400/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-12800] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-15200\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-15200/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-15200/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-15200/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-13600] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-16000\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-16000/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-16000/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-16000/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-14400] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-16800\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-16800/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-16800/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-16800/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-15200] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-17600\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-17600/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-17600/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-17600/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-16000] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-18400\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-18400/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-18400/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-18400/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-16800] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-19200\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-19200/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-19200/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-19200/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-17600] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-20000\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-20000/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-20000/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-20000/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-18400] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-20800\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-20800/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-20800/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-20800/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-19200] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-21600\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-21600/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-21600/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-21600/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-20000] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-22400\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-22400/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-22400/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-22400/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-20800] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-23200\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-23200/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-23200/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-23200/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-21600] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-24000\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-24000/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-24000/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-24000/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-22400] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-24800\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-24800/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-24800/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-24800/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-23200] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-25600\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-25600/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-25600/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-25600/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-24000] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-26400\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-26400/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-26400/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-26400/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-24800] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-27200\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-27200/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-27200/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-27200/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-25600] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-28000\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-28000/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-28000/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-28000/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-26400] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-28800\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-28800/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-28800/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-28800/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-27200] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-29600\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-29600/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-29600/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-29600/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-28000] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-30400\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-30400/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-30400/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-30400/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-28800] due to args.save_total_limit\n", + "The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.\n", + "***** Running Evaluation *****\n", + " Num examples = 7267\n", + " Batch size = 8\n", + "Saving model checkpoint to wav2vec2-xls-r-300m-cs-250/checkpoint-31200\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-31200/config.json\n", + "Model weights saved in wav2vec2-xls-r-300m-cs-250/checkpoint-31200/pytorch_model.bin\n", + "Configuration saved in wav2vec2-xls-r-300m-cs-250/checkpoint-31200/preprocessor_config.json\n", + "Deleting older checkpoint [wav2vec2-xls-r-300m-cs-250/checkpoint-29600] due to args.save_total_limit\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtrainer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/transformers/trainer.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1337\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1338\u001b[0m \u001b[0mstep\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1339\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mstep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepoch_iterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1340\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1341\u001b[0m \u001b[0;31m# Skip past any already trained steps if resuming training\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m__next__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 519\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sampler_iter\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 520\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 521\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_next_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 522\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_num_yielded\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 523\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dataset_kind\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0m_DatasetKind\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIterable\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.8/site-packages/torch/utils/data/dataloader.py\u001b[0m in \u001b[0;36m_next_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 559\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_next_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 560\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_next_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# may raise StopIteration\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 561\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dataset_fetcher\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfetch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# may raise StopIteration\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 562\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_pin_memory\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 563\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_utils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpin_memory\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpin_memory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py\u001b[0m in \u001b[0;36mfetch\u001b[0;34m(self, possibly_batched_index)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfetch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpossibly_batched_index\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_collation\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0midx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpossibly_batched_index\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 50\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mpossibly_batched_index\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mfetch\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpossibly_batched_index\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauto_collation\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 49\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0midx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mpossibly_batched_index\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 50\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 51\u001b[0m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mpossibly_batched_index\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/datasets/arrow_dataset.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1923\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__getitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# noqa: F811\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1924\u001b[0m \u001b[0;34m\"\"\"Can be used to index columns (by string names) or rows (by integer index or iterable of indices or bools).\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1925\u001b[0;31m return self._getitem(\n\u001b[0m\u001b[1;32m 1926\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1927\u001b[0m )\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/datasets/arrow_dataset.py\u001b[0m in \u001b[0;36m_getitem\u001b[0;34m(self, key, decoded, **kwargs)\u001b[0m\n\u001b[1;32m 1908\u001b[0m \u001b[0mformatter\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_formatter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mformat_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeatures\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdecoded\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdecoded\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mformat_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1909\u001b[0m \u001b[0mpa_subtable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mquery_table\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_data\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindices\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_indices\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_indices\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1910\u001b[0;31m formatted_output = format_table(\n\u001b[0m\u001b[1;32m 1911\u001b[0m \u001b[0mpa_subtable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformatter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mformatter\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mformat_columns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mformat_columns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutput_all_columns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0moutput_all_columns\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1912\u001b[0m )\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/datasets/formatting/formatting.py\u001b[0m in \u001b[0;36mformat_table\u001b[0;34m(table, key, formatter, format_columns, output_all_columns)\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[0mpython_formatter\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mPythonFormatter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 531\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mformat_columns\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 532\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mformatter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpa_table\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquery_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mquery_type\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 533\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mquery_type\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"column\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 534\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mformat_columns\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/datasets/formatting/formatting.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, pa_table, query_type)\u001b[0m\n\u001b[1;32m 279\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpa_table\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mpa\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mquery_type\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mUnion\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mRowFormat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mColumnFormat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBatchFormat\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mquery_type\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"row\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 281\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat_row\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpa_table\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 282\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mquery_type\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"column\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpa_table\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/datasets/formatting/formatting.py\u001b[0m in \u001b[0;36mformat_row\u001b[0;34m(self, pa_table)\u001b[0m\n\u001b[1;32m 308\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mPythonFormatter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mFormatter\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mformat_row\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpa_table\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mpa\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTable\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 310\u001b[0;31m \u001b[0mrow\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpython_arrow_extractor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextract_row\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpa_table\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 311\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecoded\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 312\u001b[0m \u001b[0mrow\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpython_features_decoder\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdecode_row\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.local/lib/python3.8/site-packages/datasets/formatting/formatting.py\u001b[0m in \u001b[0;36mextract_row\u001b[0;34m(self, pa_table)\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mPythonArrowExtractor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mBaseArrowExtractor\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 139\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mextract_row\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpa_table\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mpa\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTable\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 140\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_unnest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpa_table\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_pydict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 141\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 142\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mextract_column\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpa_table\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mpa\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mTable\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "trainer.train()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6dad336a", + "metadata": {}, + "outputs": [], + "source": [ + "trainer.create_model_card()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed1234c4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f11836c9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}