diff --git "a/wav2vec2/wav2vec2-xls-r-ukrainian-cv-10.ipynb" "b/wav2vec2/wav2vec2-xls-r-ukrainian-cv-10.ipynb" new file mode 100644--- /dev/null +++ "b/wav2vec2/wav2vec2-xls-r-ukrainian-cv-10.ipynb" @@ -0,0 +1,4969 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 829, + "status": "ok", + "timestamp": 1641588786523, + "user": { + "displayName": "Yurii Paniv", + "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", + "userId": "13095662915325887123" + }, + "user_tz": -120 + }, + "id": "YELVqGxMxnbG", + "outputId": "876761c1-2e03-411b-e61b-07ac4ad61377" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fri Sep 2 01:31:23 2022 \n", + "+-----------------------------------------------------------------------------+\n", + "| NVIDIA-SMI 515.65.01 Driver Version: 515.65.01 CUDA Version: 11.7 |\n", + "|-------------------------------+----------------------+----------------------+\n", + "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", + "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", + "| | | MIG M. |\n", + "|===============================+======================+======================|\n", + "| 0 NVIDIA GeForce ... Off | 00000000:0A:00.0 On | N/A |\n", + "| 0% 35C P5 52W / 390W | 1231MiB / 24576MiB | 34% Default |\n", + "| | | N/A |\n", + "+-------------------------------+----------------------+----------------------+\n", + " \n", + "+-----------------------------------------------------------------------------+\n", + "| Processes: |\n", + "| GPU GI CI PID Type Process name GPU Memory |\n", + "| ID ID Usage |\n", + "|=============================================================================|\n", + "| 0 N/A N/A 1216 G /usr/lib/xorg/Xorg 485MiB |\n", + "| 0 N/A N/A 1601 G /usr/bin/kwin_x11 97MiB |\n", + "| 0 N/A N/A 1650 G /usr/bin/plasmashell 64MiB |\n", + "| 0 N/A N/A 1747 G telegram-desktop 4MiB |\n", + "| 0 N/A N/A 4701 G ...5/usr/lib/firefox/firefox 175MiB |\n", + "| 0 N/A N/A 804722 G ...RendererForSitePerProcess 363MiB |\n", + "| 0 N/A N/A 867357 G ...996071496053229024,131072 35MiB |\n", + "+-----------------------------------------------------------------------------+\n" + ] + } + ], + "source": [ + "gpu_info = !nvidia-smi\n", + "gpu_info = '\\n'.join(gpu_info)\n", + "if gpu_info.find('failed') >= 0:\n", + " print('Not connected to a GPU')\n", + "else:\n", + " print(gpu_info)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "c8eh87Hoee5d" + }, + "outputs": [], + "source": [ + "#%%capture\n", + "#!pip install datasets==1.13.3\n", + "#!pip install transformers==4.11.3\n", + "#!pip install huggingface_hub==0.1\n", + "#!pip install torchaudio==0.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html\n", + "#!pip install jiwer" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 5334, + "status": "ok", + "timestamp": 1641588811766, + "user": { + "displayName": "Yurii Paniv", + "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", + "userId": "13095662915325887123" + }, + "user_tz": -120 + }, + "id": "2MMXcWFFgCXU", + "outputId": "be9fd72e-4395-4cd0-ff87-631dad046e71" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Reusing dataset common_voice_10_0 (/home/robinhad/.cache/huggingface/datasets/mozilla-foundation___common_voice_10_0/uk/10.0.0/27df768ab1b5cac48a7616f145b79b62599167b0ffa2e054bf4c3e74e9619e5e)\n", + "Reusing dataset common_voice_10_0 (/home/robinhad/.cache/huggingface/datasets/mozilla-foundation___common_voice_10_0/uk/10.0.0/27df768ab1b5cac48a7616f145b79b62599167b0ffa2e054bf4c3e74e9619e5e)\n" + ] + } + ], + "source": [ + "from datasets import load_dataset, load_metric, Audio\n", + "\n", + "common_voice_train = load_dataset(\"mozilla-foundation/common_voice_10_0\", \"uk\", split=\"train\", use_auth_token=True)\n", + "common_voice_test = load_dataset(\"mozilla-foundation/common_voice_10_0\", \"uk\", split=\"test\", use_auth_token=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['client_id', 'path', 'audio', 'sentence', 'up_votes', 'down_votes', 'age', 'gender', 'accent', 'locale', 'segment'],\n", + " num_rows: 11463\n", + "})" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "common_voice_train" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "common_voice_train.cleanup_cache_files()\n", + "common_voice_test.cleanup_cache_files()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "kbyq6lDgQc2a" + }, + "outputs": [], + "source": [ + "common_voice_train = common_voice_train.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])\n", + "common_voice_test = common_voice_test.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "72737oog2F6U" + }, + "outputs": [], + "source": [ + "from datasets import ClassLabel\n", + "import random\n", + "import pandas as pd\n", + "from IPython.display import display, HTML\n", + "\n", + "def show_random_elements(dataset, num_examples=10):\n", + " assert num_examples <= len(dataset), \"Can't pick more elements than there are in the dataset.\"\n", + " picks = []\n", + " for _ in range(num_examples):\n", + " pick = random.randint(0, len(dataset)-1)\n", + " while pick in picks:\n", + " pick = random.randint(0, len(dataset)-1)\n", + " picks.append(pick)\n", + " \n", + " df = pd.DataFrame(dataset[picks])\n", + " display(HTML(df.to_html()))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "executionInfo": { + "elapsed": 39, + "status": "ok", + "timestamp": 1641588811771, + "user": { + "displayName": "Yurii Paniv", + "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", + "userId": "13095662915325887123" + }, + "user_tz": -120 + }, + "id": "K_JUmf3G3b9S", + "outputId": "8603c909-09e1-43ae-f7c2-b27b25d795a3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sentence
0Вона нас не лякає.
1Бейнбрідж затримався, готуючи екіпажі, й фактично не встиг узяти участі у війні.
2А тепер.
3Наші \"будьонівці\", ніби з цікавості, зібралися й оточили червоні шеренги.
4Серед квітів я вмирав, Серед хмар я воскресав.
5Сьогодні виробництво полімерів найбільша галузь хімічної промисловості.
6Хмельницький заплатив за все на цілий рік наперед.
7Соловій же залишився підпалити бікфордів шнур.
8Тоді його слово буде хвилювати, захоплювати, піднімати людську душу.
9Тут були яблуні, сливи, вишні, — вишень найбільше.
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_random_elements(common_voice_train.remove_columns([\"path\", \"audio\"]), num_examples=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 30, + "status": "ok", + "timestamp": 1641588811775, + "user": { + "displayName": "Yurii Paniv", + "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", + "userId": "13095662915325887123" + }, + "user_tz": -120 + }, + "id": "XIHocAuTQbBR", + "outputId": "e8392853-e0d1-45ba-df74-065c50565654" + }, + "outputs": [ + { + "data": { + "application/json": { + "ascii": false, + "bar_format": null, + "colour": null, + "elapsed": 0.00995326042175293, + "initial": 0, + "n": 0, + "ncols": null, + "nrows": null, + "postfix": null, + "prefix": "", + "rate": null, + "total": 12, + "unit": "ba", + "unit_divisor": 1000, + "unit_scale": false + }, + "application/vnd.jupyter.widget-view+json": { + "model_id": "4c0c81459dfb4ede8f0ec6fe25a0807e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/12 [00:00\n", + " \n", + " \n", + " \n", + " sentence\n", + " \n", + " \n", + " \n", + " \n", + " 0\n", + " так так усе на світі кінчається\n", + " \n", + " \n", + " 1\n", + " комуністів тільки й є що воєнком та два ротні політруки\n", + " \n", + " \n", + " 2\n", + " уже й убитих чимало\n", + " \n", + " \n", + " 3\n", + " трупів не закопували\n", + " \n", + " \n", + " 4\n", + " до фастова дісталася з якимось польським обозом\n", + " \n", + " \n", + " 5\n", + " невже то ви були\n", + " \n", + " \n", + " 6\n", + " при отій купці отої нещасної духовної братії\n", + " \n", + " \n", + " 7\n", + " вирішуємо напасти на бригаду по дорозі не допустивши до села\n", + " \n", + " \n", + " 8\n", + " де то хто таке видав аби хлопи купували панські маєтки\n", + " \n", + " \n", + " 9\n", + " коні пішли з коноводами в балку\n", + " \n", + " \n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_random_elements(common_voice_train.map(cleaner).remove_columns([\"path\", \"audio\"]), num_examples=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading cached processed dataset at /home/robinhad/.cache/huggingface/datasets/mozilla-foundation___common_voice_10_0/uk/10.0.0/27df768ab1b5cac48a7616f145b79b62599167b0ffa2e054bf4c3e74e9619e5e/cache-890587fbc5f83609.arrow\n" + ] + }, + { + "data": { + "application/json": { + "ascii": false, + "bar_format": null, + "colour": null, + "elapsed": 0.007320880889892578, + "initial": 0, + "n": 0, + "ncols": null, + "nrows": null, + "postfix": null, + "prefix": "", + "rate": null, + "total": 6783, + "unit": "ex", + "unit_divisor": 1000, + "unit_scale": false + }, + "application/vnd.jupyter.widget-view+json": { + "model_id": "8261bf1a7bd747fb88f7e063c24273d4", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/6783 [00:00\n", + " \n", + " \n", + " \n", + " sentence\n", + " \n", + " \n", + " \n", + " \n", + " 0\n", + " я замилувався маневруванням тачанок на полях\n", + " \n", + " \n", + " 1\n", + " андрій із бугаєм вилізли на близький горб роздивилися\n", + " \n", + " \n", + " 2\n", + " вона нам потрібна як щоденний хліб\n", + " \n", + " \n", + " 3\n", + " олесеві ще більше захотілось чаю\n", + " \n", + " \n", + " 4\n", + " вирішуємо тут поснідати і з годину відпочити бо люди й коні потомлені\n", + " \n", + " \n", + " 5\n", + " воротилову сотню найдужче боявся він дрібного дощу який почав сіятись удосвіта й міг зашкодити\n", + " \n", + " \n", + " 6\n", + " люта злість піднялася в душі хлопця при вигляді оцього свого тирана\n", + " \n", + " \n", + " 7\n", + " також цього року в столиці виникла низка профспілкових організацій і був створений робочий клуб\n", + " \n", + " \n", + " 8\n", + " накидав того літа а вони в наших плавнях затрималися всю січ мені засмерділи\n", + " \n", + " \n", + " 9\n", + " їздять коло нас\n", + " \n", + " \n", + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "show_random_elements(common_voice_train.remove_columns([\"path\",\"audio\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "LwCshNbbeRZR" + }, + "outputs": [], + "source": [ + "def extract_all_chars(batch):\n", + " all_text = \" \".join(batch[\"sentence\"])\n", + " vocab = list(set(all_text))\n", + " return {\"vocab\": [vocab], \"all_text\": [all_text]}" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 81, + "referenced_widgets": [ + "116786d9364a4a57b521cddaabeda688", + "9baa2f69aa9c4387bf1086a04ed78420", + "a1e2c04dc2cb45ea80bec125e3dbf56f", + "b6d46d40efa14b21814f41531f5a2f41", + "d8bf8dc5d6c84140a4e96c9c435b8f17", + "04ec68b059df4c628839c3ac29e2ebdd", + "427056895c674c428400bee0f5b43995", + "d518f2c2ab6945b78a6d336dad6262bd", + "77f1a51099b24831ad8b2be3d2dc833a", + "5815ae1348994bfebba4a8e968489a96", + "22ba979142074f1d976e1a905544fd2d", + "8b6b7f28751c45c8869aa86eb2a0ab26", + "445c84e1e2e541f2a54fb989def386ae", + "68502fb433564eee8dfdf272ed7e4f56", + "1f3abdf2e0f6459da4179a94d691c4c4", + "48c60be3ca9349a295b83f65769c7f27", + "6c80bd8a8fe14a5989fe27445c14650f", + "5c2a7fea8c434d51ada69a0854b88baf", + "414efa8a08cd491cb78af8a95a151daa", + "c31a747e18df4b4aa4449a30e387448c", + "3dedffa30b774426bd474072a3a0d591", + "05d8496d54174ae298c319b0194fc710" + ] + }, + "executionInfo": { + "elapsed": 560, + "status": "ok", + "timestamp": 1641588812313, + "user": { + "displayName": "Yurii Paniv", + "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", + "userId": "13095662915325887123" + }, + "user_tz": -120 + }, + "id": "_m6uUjjcfbjH", + "outputId": "4cc94e18-9295-4414-c611-c98916fe3d4d" + }, + "outputs": [ + { + "data": { + "application/json": { + "ascii": false, + "bar_format": null, + "colour": null, + "elapsed": 0.009069681167602539, + "initial": 0, + "n": 0, + "ncols": null, + "nrows": null, + "postfix": null, + "prefix": "", + "rate": null, + "total": 1, + "unit": "ba", + "unit_divisor": 1000, + "unit_scale": false + }, + "application/vnd.jupyter.widget-view+json": { + "model_id": "21abcee2f4f6401096ebfcc2b283f704", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/1 [00:00\n", + " \n", + " Your browser does not support the audio element.\n", + " \n", + " " + ], + "text/plain": [ + "" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import IPython.display as ipd\n", + "import numpy as np\n", + "import random\n", + "\n", + "rand_int = random.randint(0, len(common_voice_train)-1)\n", + "\n", + "print(common_voice_train[rand_int][\"sentence\"])\n", + "ipd.Audio(data=common_voice_train[rand_int][\"audio\"][\"array\"], autoplay=True, rate=16000)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 22, + "status": "ok", + "timestamp": 1641588821176, + "user": { + "displayName": "Yurii Paniv", + "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", + "userId": "13095662915325887123" + }, + "user_tz": -120 + }, + "id": "1Po2g7YPuRTx", + "outputId": "ad79ec8a-ab5a-4c52-edfa-a20d0eec2282" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target text: от їхало якихось із десять на конях назустріч\n", + "Input array shape: (73152,)\n", + "Sampling rate: 16000\n" + ] + } + ], + "source": [ + "rand_int = random.randint(0, len(common_voice_train)-1)\n", + "\n", + "print(\"Target text:\", common_voice_train[rand_int][\"sentence\"])\n", + "print(\"Input array shape:\", common_voice_train[rand_int][\"audio\"][\"array\"].shape)\n", + "print(\"Sampling rate:\", common_voice_train[rand_int][\"audio\"][\"sampling_rate\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "id": "eJY7I0XAwe9p" + }, + "outputs": [], + "source": [ + "def prepare_dataset(batch):\n", + " audio = batch[\"audio\"]\n", + "\n", + " # batched output is \"un-batched\"\n", + " batch[\"input_values\"] = processor(audio[\"array\"], sampling_rate=audio[\"sampling_rate\"]).input_values[0]\n", + " batch[\"input_length\"] = len(batch[\"input_values\"])\n", + " \n", + " with processor.as_target_processor():\n", + " batch[\"labels\"] = processor(batch[\"sentence\"]).input_ids\n", + " return batch" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 87, + "referenced_widgets": [ + "a29f88f174f8499082fbb36a36c47fa4", + "efc3bc0c48124ebeb79d245216eaf0fe", + "d45747150d0b434593a3a7c98399599a", + "ea73f7deb1c643f7b81de7fb7acaaf1b", + "18bc63944343440f837cdff76db004fc", + "9c875952cdd649a5bab87de9bb3f5200", + "aa329cb93df44a6da6012c7cc49d7489", + "b39b6e9131ca4ce3b31e84ceb04e1b83", + "c5eed102ef134a4e8ca41713b82ff6a4", + "e6e50da6516847878309fdc5c463edb3", + "a4ae510b4f3845f891a796cf844fc2bb" + ] + }, + "executionInfo": { + "elapsed": 107521, + "status": "ok", + "timestamp": 1641588928679, + "user": { + "displayName": "Yurii Paniv", + "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", + "userId": "13095662915325887123" + }, + "user_tz": -120 + }, + "id": "-np9xYK-wl8q", + "outputId": "779b4637-0606-4cc8-be3c-16c1c4241e63" + }, + "outputs": [ + { + "data": { + "application/json": { + "ascii": false, + "bar_format": null, + "colour": null, + "elapsed": 0.00739741325378418, + "initial": 0, + "n": 0, + "ncols": null, + "nrows": null, + "postfix": null, + "prefix": "", + "rate": null, + "total": 11463, + "unit": "ex", + "unit_divisor": 1000, + "unit_scale": false + }, + "application/vnd.jupyter.widget-view+json": { + "model_id": "c51a283b0cf149d7a84ade53f6eb40d9", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/11463 [00:00 Dict[str, torch.Tensor]:\n", + " # split inputs and labels since they have to be of different lenghts and need\n", + " # different padding methods\n", + " input_features = [{\"input_values\": feature[\"input_values\"]} for feature in features]\n", + " label_features = [{\"input_ids\": feature[\"labels\"]} for feature in features]\n", + "\n", + " batch = self.processor.pad(\n", + " input_features,\n", + " padding=self.padding,\n", + " return_tensors=\"pt\",\n", + " )\n", + " with self.processor.as_target_processor():\n", + " labels_batch = self.processor.pad(\n", + " label_features,\n", + " padding=self.padding,\n", + " return_tensors=\"pt\",\n", + " )\n", + "\n", + " # replace padding with -100 to ignore loss correctly\n", + " labels = labels_batch[\"input_ids\"].masked_fill(labels_batch.attention_mask.ne(1), -100)\n", + "\n", + " batch[\"labels\"] = labels\n", + "\n", + " return batch" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "id": "lbQf5GuZyQ4_" + }, + "outputs": [], + "source": [ + "data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "id": "9Xsux2gmyXso" + }, + "outputs": [], + "source": [ + "wer_metric = load_metric(\"wer\")\n", + "cer_metric = load_metric(\"cer\")\n", + "metrics = [wer_metric, cer_metric]" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "id": "1XZ-kjweyTy_" + }, + "outputs": [], + "source": [ + "def compute_metrics(pred):\n", + " pred_logits = pred.predictions\n", + " pred_ids = np.argmax(pred_logits, axis=-1)\n", + "\n", + " pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id\n", + "\n", + " pred_str = processor.batch_decode(pred_ids)\n", + " # we do not want to group tokens when computing the metrics\n", + " label_str = processor.batch_decode(pred.label_ids, group_tokens=False)\n", + "\n", + " wer = wer_metric.compute(predictions=pred_str, references=label_str)\n", + " cer = cer_metric.compute(predictions=pred_str, references=label_str)\n", + "\n", + " return {\"wer\": wer, \"cer\": cer}" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 9496, + "status": "ok", + "timestamp": 1641588938616, + "user": { + "displayName": "Yurii Paniv", + "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", + "userId": "13095662915325887123" + }, + "user_tz": -120 + }, + "id": "e7cqAWIayn6w", + "outputId": "b7b20ce9-e1b2-473f-8032-2a75f98dfa9e" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['quantizer.codevectors', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight', 'quantizer.weight_proj.bias', 'project_q.bias', 'project_hid.weight']\n", + "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", + "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", + "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n", + "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" + ] + } + ], + "source": [ + "from transformers import Wav2Vec2ForCTC\n", + "\n", + "model = Wav2Vec2ForCTC.from_pretrained(\n", + " \"facebook/wav2vec2-xls-r-300m\", \n", + " attention_dropout=0.3,\n", + " hidden_dropout=0.3,\n", + " feat_proj_dropout=0.3,\n", + " mask_time_prob=0.05,\n", + " layerdrop=0.3,\n", + " ctc_loss_reduction=\"mean\", \n", + " pad_token_id=processor.tokenizer.pad_token_id,\n", + " vocab_size=len(processor.tokenizer),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "id": "oGI8zObtZ3V0" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/robinhad/Projects/Speech/wav2vec2-xls-r-ukrainian/.venv/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:1618: FutureWarning: The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5.Please use the equivalent `freeze_feature_encoder` method instead.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "model.freeze_feature_extractor()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "id": "KbeKSV7uzGPP" + }, + "outputs": [], + "source": [ + "from transformers import TrainingArguments\n", + "\n", + "training_args = TrainingArguments(\n", + " output_dir=repo_name,\n", + " group_by_length=True,\n", + " per_device_train_batch_size=16,\n", + " gradient_accumulation_steps=6,\n", + " eval_accumulation_steps=4,\n", + " evaluation_strategy=\"steps\",\n", + " num_train_epochs=100,\n", + " gradient_checkpointing=True,\n", + " fp16=True,\n", + " save_steps=400,\n", + " eval_steps=400,\n", + " logging_steps=400,\n", + " learning_rate=3e-4,\n", + " warmup_steps=500,\n", + " save_total_limit=2,\n", + " report_to=\"tensorboard\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "executionInfo": { + "elapsed": 11063, + "status": "ok", + "timestamp": 1641588949674, + "user": { + "displayName": "Yurii Paniv", + "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", + "userId": "13095662915325887123" + }, + "user_tz": -120 + }, + "id": "rY7vBmFCPFgC", + "outputId": "2e89d5ea-5b25-44bf-8492-a6220b0b1c38" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using cuda_amp half precision backend\n" + ] + } + ], + "source": [ + "from transformers import Trainer\n", + "\n", + "trainer = Trainer(\n", + " model=model,\n", + " data_collator=data_collator,\n", + " args=training_args,\n", + " compute_metrics=compute_metrics,\n", + " train_dataset=common_voice_train,\n", + " eval_dataset=common_voice_test,\n", + " tokenizer=processor.feature_extractor,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 409 + }, + "id": "9fRr9TG5pGBl", + "outputId": "c2a7c797-326c-4bd2-b167-9d2f41d77def" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n", + "/home/robinhad/Projects/Speech/wav2vec2-xls-r-ukrainian/.venv/lib/python3.9/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", + " warnings.warn(\n", + "***** Running training *****\n", + " Num examples = 11463\n", + " Num Epochs = 100\n", + " Instantaneous batch size per device = 16\n", + " Total train batch size (w. parallel, distributed & accumulation) = 96\n", + " Gradient Accumulation steps = 6\n", + " Total optimization steps = 11900\n" + ] + }, + { + "data": { + "application/json": { + "ascii": false, + "bar_format": null, + "colour": null, + "elapsed": 0.007272958755493164, + "initial": 0, + "n": 0, + "ncols": null, + "nrows": null, + "postfix": null, + "prefix": "", + "rate": null, + "total": 11900, + "unit": "it", + "unit_divisor": 1000, + "unit_scale": false + }, + "application/vnd.jupyter.widget-view+json": { + "model_id": "deebda57b25f4f95b4915d7a8d479a62", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/11900 [00:00 to the vocabulary\n", + "Adding to the vocabulary\n" + ] + } + ], + "source": [ + "from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor\n", + "model = Wav2Vec2ForCTC.from_pretrained(repo_name + \"/checkpoint-11200\").to(\"cuda\")\n", + "processor = Wav2Vec2Processor.from_pretrained(repo_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jD7TZ1YS3S_K" + }, + "source": [ + "\n", + "Now, we will just take the first example of the test set, run it through the model and take the `argmax(...)` of the logits to retrieve the predicted token ids." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pax07TnL3WZn" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n" + ] + } + ], + "source": [ + "audio_id = 10\n", + "\n", + "input_dict = processor(common_voice_test[\"input_values\"], return_tensors=\"pt\", padding=True)\n", + "\n", + "logits = model(input_dict.input_values.to(\"cuda\")).logits\n", + "\n", + "pred_ids = torch.argmax(logits, dim=-1)[audio_id]\n", + "\n", + "common_voice_test_transcription = load_dataset(\"common_voice\", \"uk\", split=\"test\")\n", + "\n", + "print(\"Prediction:\")\n", + "print(processor.decode(pred_ids))\n", + "\n", + "print(\"\\nReference:\")\n", + "print(common_voice_test_transcription[audio_id][\"sentence\"].lower())" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "machine_shape": "hm", + "name": "Копія записника \"Fine-Tune XLS-R on Common Voice.ipynb\"", + "provenance": [ + { + "file_id": "https://github.com/patrickvonplaten/notebooks/blob/master/Fine_Tune_XLS_R_on_Common_Voice.ipynb", + "timestamp": 1641583715050 + } + ] + }, + "kernelspec": { + "display_name": "Python 3.9.13 (conda)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "vscode": { + "interpreter": { + "hash": "a5cdd9abf8df3af0fd61fdb3838d6c6f2f66a9ba4bf4484f45cd88abf9f04fe9" + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "04ec68b059df4c628839c3ac29e2ebdd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "05d8496d54174ae298c319b0194fc710": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "116786d9364a4a57b521cddaabeda688": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_a1e2c04dc2cb45ea80bec125e3dbf56f", + "IPY_MODEL_b6d46d40efa14b21814f41531f5a2f41", + "IPY_MODEL_d8bf8dc5d6c84140a4e96c9c435b8f17" + ], + "layout": "IPY_MODEL_9baa2f69aa9c4387bf1086a04ed78420" + } + }, + "18bc63944343440f837cdff76db004fc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a4ae510b4f3845f891a796cf844fc2bb", + "placeholder": "​", + "style": "IPY_MODEL_e6e50da6516847878309fdc5c463edb3", + "value": " 6962/6962 [01:46<00:00, 78.15ex/s]" + } + }, + "1f3abdf2e0f6459da4179a94d691c4c4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c31a747e18df4b4aa4449a30e387448c", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_414efa8a08cd491cb78af8a95a151daa", + "value": 1 + } + }, + "22ba979142074f1d976e1a905544fd2d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3dedffa30b774426bd474072a3a0d591": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "414efa8a08cd491cb78af8a95a151daa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "427056895c674c428400bee0f5b43995": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "445c84e1e2e541f2a54fb989def386ae": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "48c60be3ca9349a295b83f65769c7f27": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_05d8496d54174ae298c319b0194fc710", + "placeholder": "​", + "style": "IPY_MODEL_3dedffa30b774426bd474072a3a0d591", + "value": " 1/1 [00:00<00:00, 11.09ba/s]" + } + }, + "5815ae1348994bfebba4a8e968489a96": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5c2a7fea8c434d51ada69a0854b88baf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "68502fb433564eee8dfdf272ed7e4f56": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5c2a7fea8c434d51ada69a0854b88baf", + "placeholder": "​", + "style": "IPY_MODEL_6c80bd8a8fe14a5989fe27445c14650f", + "value": "100%" + } + }, + "6c80bd8a8fe14a5989fe27445c14650f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "77f1a51099b24831ad8b2be3d2dc833a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8b6b7f28751c45c8869aa86eb2a0ab26": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_68502fb433564eee8dfdf272ed7e4f56", + "IPY_MODEL_1f3abdf2e0f6459da4179a94d691c4c4", + "IPY_MODEL_48c60be3ca9349a295b83f65769c7f27" + ], + "layout": "IPY_MODEL_445c84e1e2e541f2a54fb989def386ae" + } + }, + "9baa2f69aa9c4387bf1086a04ed78420": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9c875952cdd649a5bab87de9bb3f5200": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "a1e2c04dc2cb45ea80bec125e3dbf56f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_427056895c674c428400bee0f5b43995", + "placeholder": "​", + "style": "IPY_MODEL_04ec68b059df4c628839c3ac29e2ebdd", + "value": "100%" + } + }, + "a29f88f174f8499082fbb36a36c47fa4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_d45747150d0b434593a3a7c98399599a", + "IPY_MODEL_ea73f7deb1c643f7b81de7fb7acaaf1b", + "IPY_MODEL_18bc63944343440f837cdff76db004fc" + ], + "layout": "IPY_MODEL_efc3bc0c48124ebeb79d245216eaf0fe" + } + }, + "a4ae510b4f3845f891a796cf844fc2bb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "aa329cb93df44a6da6012c7cc49d7489": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b39b6e9131ca4ce3b31e84ceb04e1b83": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "b6d46d40efa14b21814f41531f5a2f41": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_77f1a51099b24831ad8b2be3d2dc833a", + "max": 1, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_d518f2c2ab6945b78a6d336dad6262bd", + "value": 1 + } + }, + "c31a747e18df4b4aa4449a30e387448c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c5eed102ef134a4e8ca41713b82ff6a4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d45747150d0b434593a3a7c98399599a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aa329cb93df44a6da6012c7cc49d7489", + "placeholder": "​", + "style": "IPY_MODEL_9c875952cdd649a5bab87de9bb3f5200", + "value": "100%" + } + }, + "d518f2c2ab6945b78a6d336dad6262bd": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "d8bf8dc5d6c84140a4e96c9c435b8f17": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_22ba979142074f1d976e1a905544fd2d", + "placeholder": "​", + "style": "IPY_MODEL_5815ae1348994bfebba4a8e968489a96", + "value": " 1/1 [00:00<00:00, 7.95ba/s]" + } + }, + "e6e50da6516847878309fdc5c463edb3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ea73f7deb1c643f7b81de7fb7acaaf1b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c5eed102ef134a4e8ca41713b82ff6a4", + "max": 6962, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_b39b6e9131ca4ce3b31e84ceb04e1b83", + "value": 6962 + } + }, + "efc3bc0c48124ebeb79d245216eaf0fe": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}