{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 829, "status": "ok", "timestamp": 1641588786523, "user": { "displayName": "Yurii Paniv", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", "userId": "13095662915325887123" }, "user_tz": -120 }, "id": "YELVqGxMxnbG", "outputId": "876761c1-2e03-411b-e61b-07ac4ad61377" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fri Sep 2 01:31:23 2022 \n", "+-----------------------------------------------------------------------------+\n", "| NVIDIA-SMI 515.65.01 Driver Version: 515.65.01 CUDA Version: 11.7 |\n", "|-------------------------------+----------------------+----------------------+\n", "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|===============================+======================+======================|\n", "| 0 NVIDIA GeForce ... Off | 00000000:0A:00.0 On | N/A |\n", "| 0% 35C P5 52W / 390W | 1231MiB / 24576MiB | 34% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", " \n", "+-----------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=============================================================================|\n", "| 0 N/A N/A 1216 G /usr/lib/xorg/Xorg 485MiB |\n", "| 0 N/A N/A 1601 G /usr/bin/kwin_x11 97MiB |\n", "| 0 N/A N/A 1650 G /usr/bin/plasmashell 64MiB |\n", "| 0 N/A N/A 1747 G telegram-desktop 4MiB |\n", "| 0 N/A N/A 4701 G ...5/usr/lib/firefox/firefox 175MiB |\n", "| 0 N/A N/A 804722 G ...RendererForSitePerProcess 363MiB |\n", "| 0 N/A N/A 867357 G ...996071496053229024,131072 35MiB |\n", "+-----------------------------------------------------------------------------+\n" ] } ], "source": [ "gpu_info = !nvidia-smi\n", "gpu_info = '\\n'.join(gpu_info)\n", "if gpu_info.find('failed') >= 0:\n", " print('Not connected to a GPU')\n", "else:\n", " print(gpu_info)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "c8eh87Hoee5d" }, "outputs": [], "source": [ "#%%capture\n", "#!pip install datasets==1.13.3\n", "#!pip install transformers==4.11.3\n", "#!pip install huggingface_hub==0.1\n", "#!pip install torchaudio==0.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html\n", "#!pip install jiwer" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 5334, "status": "ok", "timestamp": 1641588811766, "user": { "displayName": "Yurii Paniv", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", "userId": "13095662915325887123" }, "user_tz": -120 }, "id": "2MMXcWFFgCXU", "outputId": "be9fd72e-4395-4cd0-ff87-631dad046e71" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Reusing dataset common_voice_10_0 (/home/robinhad/.cache/huggingface/datasets/mozilla-foundation___common_voice_10_0/uk/10.0.0/27df768ab1b5cac48a7616f145b79b62599167b0ffa2e054bf4c3e74e9619e5e)\n", "Reusing dataset common_voice_10_0 (/home/robinhad/.cache/huggingface/datasets/mozilla-foundation___common_voice_10_0/uk/10.0.0/27df768ab1b5cac48a7616f145b79b62599167b0ffa2e054bf4c3e74e9619e5e)\n" ] } ], "source": [ "from datasets import load_dataset, load_metric, Audio\n", "\n", "common_voice_train = load_dataset(\"mozilla-foundation/common_voice_10_0\", \"uk\", split=\"train\", use_auth_token=True)\n", "common_voice_test = load_dataset(\"mozilla-foundation/common_voice_10_0\", \"uk\", split=\"test\", use_auth_token=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Dataset({\n", " features: ['client_id', 'path', 'audio', 'sentence', 'up_votes', 'down_votes', 'age', 'gender', 'accent', 'locale', 'segment'],\n", " num_rows: 11463\n", "})" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "common_voice_train" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "common_voice_train.cleanup_cache_files()\n", "common_voice_test.cleanup_cache_files()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "kbyq6lDgQc2a" }, "outputs": [], "source": [ "common_voice_train = common_voice_train.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])\n", "common_voice_test = common_voice_test.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "72737oog2F6U" }, "outputs": [], "source": [ "from datasets import ClassLabel\n", "import random\n", "import pandas as pd\n", "from IPython.display import display, HTML\n", "\n", "def show_random_elements(dataset, num_examples=10):\n", " assert num_examples <= len(dataset), \"Can't pick more elements than there are in the dataset.\"\n", " picks = []\n", " for _ in range(num_examples):\n", " pick = random.randint(0, len(dataset)-1)\n", " while pick in picks:\n", " pick = random.randint(0, len(dataset)-1)\n", " picks.append(pick)\n", " \n", " df = pd.DataFrame(dataset[picks])\n", " display(HTML(df.to_html()))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 363 }, "executionInfo": { "elapsed": 39, "status": "ok", "timestamp": 1641588811771, "user": { "displayName": "Yurii Paniv", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", "userId": "13095662915325887123" }, "user_tz": -120 }, "id": "K_JUmf3G3b9S", "outputId": "8603c909-09e1-43ae-f7c2-b27b25d795a3" }, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sentence
0Вона нас не лякає.
1Бейнбрідж затримався, готуючи екіпажі, й фактично не встиг узяти участі у війні.
2А тепер.
3Наші \"будьонівці\", ніби з цікавості, зібралися й оточили червоні шеренги.
4Серед квітів я вмирав, Серед хмар я воскресав.
5Сьогодні виробництво полімерів найбільша галузь хімічної промисловості.
6Хмельницький заплатив за все на цілий рік наперед.
7Соловій же залишився підпалити бікфордів шнур.
8Тоді його слово буде хвилювати, захоплювати, піднімати людську душу.
9Тут були яблуні, сливи, вишні, — вишень найбільше.
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_random_elements(common_voice_train.remove_columns([\"path\", \"audio\"]), num_examples=10)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 30, "status": "ok", "timestamp": 1641588811775, "user": { "displayName": "Yurii Paniv", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", "userId": "13095662915325887123" }, "user_tz": -120 }, "id": "XIHocAuTQbBR", "outputId": "e8392853-e0d1-45ba-df74-065c50565654" }, "outputs": [ { "data": { "application/json": { "ascii": false, "bar_format": null, "colour": null, "elapsed": 0.00995326042175293, "initial": 0, "n": 0, "ncols": null, "nrows": null, "postfix": null, "prefix": "", "rate": null, "total": 12, "unit": "ba", "unit_divisor": 1000, "unit_scale": false }, "application/vnd.jupyter.widget-view+json": { "model_id": "4c0c81459dfb4ede8f0ec6fe25a0807e", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/12 [00:00\n", " \n", " \n", " \n", " sentence\n", " \n", " \n", " \n", " \n", " 0\n", " так так усе на світі кінчається\n", " \n", " \n", " 1\n", " комуністів тільки й є що воєнком та два ротні політруки\n", " \n", " \n", " 2\n", " уже й убитих чимало\n", " \n", " \n", " 3\n", " трупів не закопували\n", " \n", " \n", " 4\n", " до фастова дісталася з якимось польським обозом\n", " \n", " \n", " 5\n", " невже то ви були\n", " \n", " \n", " 6\n", " при отій купці отої нещасної духовної братії\n", " \n", " \n", " 7\n", " вирішуємо напасти на бригаду по дорозі не допустивши до села\n", " \n", " \n", " 8\n", " де то хто таке видав аби хлопи купували панські маєтки\n", " \n", " \n", " 9\n", " коні пішли з коноводами в балку\n", " \n", " \n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_random_elements(common_voice_train.map(cleaner).remove_columns([\"path\", \"audio\"]), num_examples=10)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading cached processed dataset at /home/robinhad/.cache/huggingface/datasets/mozilla-foundation___common_voice_10_0/uk/10.0.0/27df768ab1b5cac48a7616f145b79b62599167b0ffa2e054bf4c3e74e9619e5e/cache-890587fbc5f83609.arrow\n" ] }, { "data": { "application/json": { "ascii": false, "bar_format": null, "colour": null, "elapsed": 0.007320880889892578, "initial": 0, "n": 0, "ncols": null, "nrows": null, "postfix": null, "prefix": "", "rate": null, "total": 6783, "unit": "ex", "unit_divisor": 1000, "unit_scale": false }, "application/vnd.jupyter.widget-view+json": { "model_id": "8261bf1a7bd747fb88f7e063c24273d4", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/6783 [00:00\n", " \n", " \n", " \n", " sentence\n", " \n", " \n", " \n", " \n", " 0\n", " я замилувався маневруванням тачанок на полях\n", " \n", " \n", " 1\n", " андрій із бугаєм вилізли на близький горб роздивилися\n", " \n", " \n", " 2\n", " вона нам потрібна як щоденний хліб\n", " \n", " \n", " 3\n", " олесеві ще більше захотілось чаю\n", " \n", " \n", " 4\n", " вирішуємо тут поснідати і з годину відпочити бо люди й коні потомлені\n", " \n", " \n", " 5\n", " воротилову сотню найдужче боявся він дрібного дощу який почав сіятись удосвіта й міг зашкодити\n", " \n", " \n", " 6\n", " люта злість піднялася в душі хлопця при вигляді оцього свого тирана\n", " \n", " \n", " 7\n", " також цього року в столиці виникла низка профспілкових організацій і був створений робочий клуб\n", " \n", " \n", " 8\n", " накидав того літа а вони в наших плавнях затрималися всю січ мені засмерділи\n", " \n", " \n", " 9\n", " їздять коло нас\n", " \n", " \n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "show_random_elements(common_voice_train.remove_columns([\"path\",\"audio\"]))" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "id": "LwCshNbbeRZR" }, "outputs": [], "source": [ "def extract_all_chars(batch):\n", " all_text = \" \".join(batch[\"sentence\"])\n", " vocab = list(set(all_text))\n", " return {\"vocab\": [vocab], \"all_text\": [all_text]}" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 81, "referenced_widgets": [ "116786d9364a4a57b521cddaabeda688", "9baa2f69aa9c4387bf1086a04ed78420", "a1e2c04dc2cb45ea80bec125e3dbf56f", "b6d46d40efa14b21814f41531f5a2f41", "d8bf8dc5d6c84140a4e96c9c435b8f17", "04ec68b059df4c628839c3ac29e2ebdd", "427056895c674c428400bee0f5b43995", "d518f2c2ab6945b78a6d336dad6262bd", "77f1a51099b24831ad8b2be3d2dc833a", "5815ae1348994bfebba4a8e968489a96", "22ba979142074f1d976e1a905544fd2d", "8b6b7f28751c45c8869aa86eb2a0ab26", "445c84e1e2e541f2a54fb989def386ae", "68502fb433564eee8dfdf272ed7e4f56", "1f3abdf2e0f6459da4179a94d691c4c4", "48c60be3ca9349a295b83f65769c7f27", "6c80bd8a8fe14a5989fe27445c14650f", "5c2a7fea8c434d51ada69a0854b88baf", "414efa8a08cd491cb78af8a95a151daa", "c31a747e18df4b4aa4449a30e387448c", "3dedffa30b774426bd474072a3a0d591", "05d8496d54174ae298c319b0194fc710" ] }, "executionInfo": { "elapsed": 560, "status": "ok", "timestamp": 1641588812313, "user": { "displayName": "Yurii Paniv", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", "userId": "13095662915325887123" }, "user_tz": -120 }, "id": "_m6uUjjcfbjH", "outputId": "4cc94e18-9295-4414-c611-c98916fe3d4d" }, "outputs": [ { "data": { "application/json": { "ascii": false, "bar_format": null, "colour": null, "elapsed": 0.009069681167602539, "initial": 0, "n": 0, "ncols": null, "nrows": null, "postfix": null, "prefix": "", "rate": null, "total": 1, "unit": "ba", "unit_divisor": 1000, "unit_scale": false }, "application/vnd.jupyter.widget-view+json": { "model_id": "21abcee2f4f6401096ebfcc2b283f704", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00\n", " \n", " Your browser does not support the audio element.\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import IPython.display as ipd\n", "import numpy as np\n", "import random\n", "\n", "rand_int = random.randint(0, len(common_voice_train)-1)\n", "\n", "print(common_voice_train[rand_int][\"sentence\"])\n", "ipd.Audio(data=common_voice_train[rand_int][\"audio\"][\"array\"], autoplay=True, rate=16000)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 22, "status": "ok", "timestamp": 1641588821176, "user": { "displayName": "Yurii Paniv", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", "userId": "13095662915325887123" }, "user_tz": -120 }, "id": "1Po2g7YPuRTx", "outputId": "ad79ec8a-ab5a-4c52-edfa-a20d0eec2282" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Target text: от їхало якихось із десять на конях назустріч\n", "Input array shape: (73152,)\n", "Sampling rate: 16000\n" ] } ], "source": [ "rand_int = random.randint(0, len(common_voice_train)-1)\n", "\n", "print(\"Target text:\", common_voice_train[rand_int][\"sentence\"])\n", "print(\"Input array shape:\", common_voice_train[rand_int][\"audio\"][\"array\"].shape)\n", "print(\"Sampling rate:\", common_voice_train[rand_int][\"audio\"][\"sampling_rate\"])" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "id": "eJY7I0XAwe9p" }, "outputs": [], "source": [ "def prepare_dataset(batch):\n", " audio = batch[\"audio\"]\n", "\n", " # batched output is \"un-batched\"\n", " batch[\"input_values\"] = processor(audio[\"array\"], sampling_rate=audio[\"sampling_rate\"]).input_values[0]\n", " batch[\"input_length\"] = len(batch[\"input_values\"])\n", " \n", " with processor.as_target_processor():\n", " batch[\"labels\"] = processor(batch[\"sentence\"]).input_ids\n", " return batch" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 87, "referenced_widgets": [ "a29f88f174f8499082fbb36a36c47fa4", "efc3bc0c48124ebeb79d245216eaf0fe", "d45747150d0b434593a3a7c98399599a", "ea73f7deb1c643f7b81de7fb7acaaf1b", "18bc63944343440f837cdff76db004fc", "9c875952cdd649a5bab87de9bb3f5200", "aa329cb93df44a6da6012c7cc49d7489", "b39b6e9131ca4ce3b31e84ceb04e1b83", "c5eed102ef134a4e8ca41713b82ff6a4", "e6e50da6516847878309fdc5c463edb3", "a4ae510b4f3845f891a796cf844fc2bb" ] }, "executionInfo": { "elapsed": 107521, "status": "ok", "timestamp": 1641588928679, "user": { "displayName": "Yurii Paniv", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", "userId": "13095662915325887123" }, "user_tz": -120 }, "id": "-np9xYK-wl8q", "outputId": "779b4637-0606-4cc8-be3c-16c1c4241e63" }, "outputs": [ { "data": { "application/json": { "ascii": false, "bar_format": null, "colour": null, "elapsed": 0.00739741325378418, "initial": 0, "n": 0, "ncols": null, "nrows": null, "postfix": null, "prefix": "", "rate": null, "total": 11463, "unit": "ex", "unit_divisor": 1000, "unit_scale": false }, "application/vnd.jupyter.widget-view+json": { "model_id": "c51a283b0cf149d7a84ade53f6eb40d9", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/11463 [00:00 Dict[str, torch.Tensor]:\n", " # split inputs and labels since they have to be of different lenghts and need\n", " # different padding methods\n", " input_features = [{\"input_values\": feature[\"input_values\"]} for feature in features]\n", " label_features = [{\"input_ids\": feature[\"labels\"]} for feature in features]\n", "\n", " batch = self.processor.pad(\n", " input_features,\n", " padding=self.padding,\n", " return_tensors=\"pt\",\n", " )\n", " with self.processor.as_target_processor():\n", " labels_batch = self.processor.pad(\n", " label_features,\n", " padding=self.padding,\n", " return_tensors=\"pt\",\n", " )\n", "\n", " # replace padding with -100 to ignore loss correctly\n", " labels = labels_batch[\"input_ids\"].masked_fill(labels_batch.attention_mask.ne(1), -100)\n", "\n", " batch[\"labels\"] = labels\n", "\n", " return batch" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "id": "lbQf5GuZyQ4_" }, "outputs": [], "source": [ "data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "id": "9Xsux2gmyXso" }, "outputs": [], "source": [ "wer_metric = load_metric(\"wer\")\n", "cer_metric = load_metric(\"cer\")\n", "metrics = [wer_metric, cer_metric]" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "id": "1XZ-kjweyTy_" }, "outputs": [], "source": [ "def compute_metrics(pred):\n", " pred_logits = pred.predictions\n", " pred_ids = np.argmax(pred_logits, axis=-1)\n", "\n", " pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id\n", "\n", " pred_str = processor.batch_decode(pred_ids)\n", " # we do not want to group tokens when computing the metrics\n", " label_str = processor.batch_decode(pred.label_ids, group_tokens=False)\n", "\n", " wer = wer_metric.compute(predictions=pred_str, references=label_str)\n", " cer = cer_metric.compute(predictions=pred_str, references=label_str)\n", "\n", " return {\"wer\": wer, \"cer\": cer}" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 9496, "status": "ok", "timestamp": 1641588938616, "user": { "displayName": "Yurii Paniv", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", "userId": "13095662915325887123" }, "user_tz": -120 }, "id": "e7cqAWIayn6w", "outputId": "b7b20ce9-e1b2-473f-8032-2a75f98dfa9e" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['quantizer.codevectors', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight', 'quantizer.weight_proj.bias', 'project_q.bias', 'project_hid.weight']\n", "- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", "Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] } ], "source": [ "from transformers import Wav2Vec2ForCTC\n", "\n", "model = Wav2Vec2ForCTC.from_pretrained(\n", " \"facebook/wav2vec2-xls-r-300m\", \n", " attention_dropout=0.3,\n", " hidden_dropout=0.3,\n", " feat_proj_dropout=0.3,\n", " mask_time_prob=0.05,\n", " layerdrop=0.3,\n", " ctc_loss_reduction=\"mean\", \n", " pad_token_id=processor.tokenizer.pad_token_id,\n", " vocab_size=len(processor.tokenizer),\n", ")" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "id": "oGI8zObtZ3V0" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/robinhad/Projects/Speech/wav2vec2-xls-r-ukrainian/.venv/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:1618: FutureWarning: The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5.Please use the equivalent `freeze_feature_encoder` method instead.\n", " warnings.warn(\n" ] } ], "source": [ "model.freeze_feature_extractor()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "id": "KbeKSV7uzGPP" }, "outputs": [], "source": [ "from transformers import TrainingArguments\n", "\n", "training_args = TrainingArguments(\n", " output_dir=repo_name,\n", " group_by_length=True,\n", " per_device_train_batch_size=16,\n", " gradient_accumulation_steps=6,\n", " eval_accumulation_steps=4,\n", " evaluation_strategy=\"steps\",\n", " num_train_epochs=100,\n", " gradient_checkpointing=True,\n", " fp16=True,\n", " save_steps=400,\n", " eval_steps=400,\n", " logging_steps=400,\n", " learning_rate=3e-4,\n", " warmup_steps=500,\n", " save_total_limit=2,\n", " report_to=\"tensorboard\"\n", ")" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 11063, "status": "ok", "timestamp": 1641588949674, "user": { "displayName": "Yurii Paniv", "photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64", "userId": "13095662915325887123" }, "user_tz": -120 }, "id": "rY7vBmFCPFgC", "outputId": "2e89d5ea-5b25-44bf-8492-a6220b0b1c38" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using cuda_amp half precision backend\n" ] } ], "source": [ "from transformers import Trainer\n", "\n", "trainer = Trainer(\n", " model=model,\n", " data_collator=data_collator,\n", " args=training_args,\n", " compute_metrics=compute_metrics,\n", " train_dataset=common_voice_train,\n", " eval_dataset=common_voice_test,\n", " tokenizer=processor.feature_extractor,\n", ")" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 409 }, "id": "9fRr9TG5pGBl", "outputId": "c2a7c797-326c-4bd2-b167-9d2f41d77def" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n", "/home/robinhad/Projects/Speech/wav2vec2-xls-r-ukrainian/.venv/lib/python3.9/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", " warnings.warn(\n", "***** Running training *****\n", " Num examples = 11463\n", " Num Epochs = 100\n", " Instantaneous batch size per device = 16\n", " Total train batch size (w. parallel, distributed & accumulation) = 96\n", " Gradient Accumulation steps = 6\n", " Total optimization steps = 11900\n" ] }, { "data": { "application/json": { "ascii": false, "bar_format": null, "colour": null, "elapsed": 0.007272958755493164, "initial": 0, "n": 0, "ncols": null, "nrows": null, "postfix": null, "prefix": "", "rate": null, "total": 11900, "unit": "it", "unit_divisor": 1000, "unit_scale": false }, "application/vnd.jupyter.widget-view+json": { "model_id": "deebda57b25f4f95b4915d7a8d479a62", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/11900 [00:00 to the vocabulary\n", "Adding to the vocabulary\n" ] } ], "source": [ "from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor\n", "model = Wav2Vec2ForCTC.from_pretrained(repo_name + \"/checkpoint-11200\").to(\"cuda\")\n", "processor = Wav2Vec2Processor.from_pretrained(repo_name)" ] }, { "cell_type": "markdown", "metadata": { "id": "jD7TZ1YS3S_K" }, "source": [ "\n", "Now, we will just take the first example of the test set, run it through the model and take the `argmax(...)` of the logits to retrieve the predicted token ids." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "pax07TnL3WZn" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n" ] } ], "source": [ "audio_id = 10\n", "\n", "input_dict = processor(common_voice_test[\"input_values\"], return_tensors=\"pt\", padding=True)\n", "\n", "logits = model(input_dict.input_values.to(\"cuda\")).logits\n", "\n", "pred_ids = torch.argmax(logits, dim=-1)[audio_id]\n", "\n", "common_voice_test_transcription = load_dataset(\"common_voice\", \"uk\", split=\"test\")\n", "\n", "print(\"Prediction:\")\n", "print(processor.decode(pred_ids))\n", "\n", "print(\"\\nReference:\")\n", "print(common_voice_test_transcription[audio_id][\"sentence\"].lower())" ] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [], "machine_shape": "hm", "name": "Копія записника \"Fine-Tune XLS-R on Common Voice.ipynb\"", "provenance": [ { "file_id": "https://github.com/patrickvonplaten/notebooks/blob/master/Fine_Tune_XLS_R_on_Common_Voice.ipynb", "timestamp": 1641583715050 } ] }, "kernelspec": { "display_name": "Python 3.9.13 (conda)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "vscode": { "interpreter": { "hash": "a5cdd9abf8df3af0fd61fdb3838d6c6f2f66a9ba4bf4484f45cd88abf9f04fe9" } }, "widgets": { "application/vnd.jupyter.widget-state+json": { "04ec68b059df4c628839c3ac29e2ebdd": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "05d8496d54174ae298c319b0194fc710": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "116786d9364a4a57b521cddaabeda688": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_a1e2c04dc2cb45ea80bec125e3dbf56f", "IPY_MODEL_b6d46d40efa14b21814f41531f5a2f41", "IPY_MODEL_d8bf8dc5d6c84140a4e96c9c435b8f17" ], "layout": "IPY_MODEL_9baa2f69aa9c4387bf1086a04ed78420" } }, "18bc63944343440f837cdff76db004fc": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_a4ae510b4f3845f891a796cf844fc2bb", "placeholder": "​", "style": "IPY_MODEL_e6e50da6516847878309fdc5c463edb3", "value": " 6962/6962 [01:46<00:00, 78.15ex/s]" } }, "1f3abdf2e0f6459da4179a94d691c4c4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c31a747e18df4b4aa4449a30e387448c", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_414efa8a08cd491cb78af8a95a151daa", "value": 1 } }, "22ba979142074f1d976e1a905544fd2d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3dedffa30b774426bd474072a3a0d591": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "414efa8a08cd491cb78af8a95a151daa": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "427056895c674c428400bee0f5b43995": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "445c84e1e2e541f2a54fb989def386ae": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "48c60be3ca9349a295b83f65769c7f27": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_05d8496d54174ae298c319b0194fc710", "placeholder": "​", "style": "IPY_MODEL_3dedffa30b774426bd474072a3a0d591", "value": " 1/1 [00:00<00:00, 11.09ba/s]" } }, "5815ae1348994bfebba4a8e968489a96": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "5c2a7fea8c434d51ada69a0854b88baf": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "68502fb433564eee8dfdf272ed7e4f56": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5c2a7fea8c434d51ada69a0854b88baf", "placeholder": "​", "style": "IPY_MODEL_6c80bd8a8fe14a5989fe27445c14650f", "value": "100%" } }, "6c80bd8a8fe14a5989fe27445c14650f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "77f1a51099b24831ad8b2be3d2dc833a": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8b6b7f28751c45c8869aa86eb2a0ab26": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_68502fb433564eee8dfdf272ed7e4f56", "IPY_MODEL_1f3abdf2e0f6459da4179a94d691c4c4", "IPY_MODEL_48c60be3ca9349a295b83f65769c7f27" ], "layout": "IPY_MODEL_445c84e1e2e541f2a54fb989def386ae" } }, "9baa2f69aa9c4387bf1086a04ed78420": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9c875952cdd649a5bab87de9bb3f5200": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "a1e2c04dc2cb45ea80bec125e3dbf56f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_427056895c674c428400bee0f5b43995", "placeholder": "​", "style": "IPY_MODEL_04ec68b059df4c628839c3ac29e2ebdd", "value": "100%" } }, "a29f88f174f8499082fbb36a36c47fa4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_d45747150d0b434593a3a7c98399599a", "IPY_MODEL_ea73f7deb1c643f7b81de7fb7acaaf1b", "IPY_MODEL_18bc63944343440f837cdff76db004fc" ], "layout": "IPY_MODEL_efc3bc0c48124ebeb79d245216eaf0fe" } }, "a4ae510b4f3845f891a796cf844fc2bb": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "aa329cb93df44a6da6012c7cc49d7489": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b39b6e9131ca4ce3b31e84ceb04e1b83": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b6d46d40efa14b21814f41531f5a2f41": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_77f1a51099b24831ad8b2be3d2dc833a", "max": 1, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_d518f2c2ab6945b78a6d336dad6262bd", "value": 1 } }, "c31a747e18df4b4aa4449a30e387448c": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c5eed102ef134a4e8ca41713b82ff6a4": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d45747150d0b434593a3a7c98399599a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_aa329cb93df44a6da6012c7cc49d7489", "placeholder": "​", "style": "IPY_MODEL_9c875952cdd649a5bab87de9bb3f5200", "value": "100%" } }, "d518f2c2ab6945b78a6d336dad6262bd": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "d8bf8dc5d6c84140a4e96c9c435b8f17": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_22ba979142074f1d976e1a905544fd2d", "placeholder": "​", "style": "IPY_MODEL_5815ae1348994bfebba4a8e968489a96", "value": " 1/1 [00:00<00:00, 7.95ba/s]" } }, "e6e50da6516847878309fdc5c463edb3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ea73f7deb1c643f7b81de7fb7acaaf1b": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c5eed102ef134a4e8ca41713b82ff6a4", "max": 6962, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_b39b6e9131ca4ce3b31e84ceb04e1b83", "value": 6962 } }, "efc3bc0c48124ebeb79d245216eaf0fe": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } } } } }, "nbformat": 4, "nbformat_minor": 4 }