{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 829,
"status": "ok",
"timestamp": 1641588786523,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "YELVqGxMxnbG",
"outputId": "876761c1-2e03-411b-e61b-07ac4ad61377"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Fri Sep 2 01:31:23 2022 \n",
"+-----------------------------------------------------------------------------+\n",
"| NVIDIA-SMI 515.65.01 Driver Version: 515.65.01 CUDA Version: 11.7 |\n",
"|-------------------------------+----------------------+----------------------+\n",
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
"| | | MIG M. |\n",
"|===============================+======================+======================|\n",
"| 0 NVIDIA GeForce ... Off | 00000000:0A:00.0 On | N/A |\n",
"| 0% 35C P5 52W / 390W | 1231MiB / 24576MiB | 34% Default |\n",
"| | | N/A |\n",
"+-------------------------------+----------------------+----------------------+\n",
" \n",
"+-----------------------------------------------------------------------------+\n",
"| Processes: |\n",
"| GPU GI CI PID Type Process name GPU Memory |\n",
"| ID ID Usage |\n",
"|=============================================================================|\n",
"| 0 N/A N/A 1216 G /usr/lib/xorg/Xorg 485MiB |\n",
"| 0 N/A N/A 1601 G /usr/bin/kwin_x11 97MiB |\n",
"| 0 N/A N/A 1650 G /usr/bin/plasmashell 64MiB |\n",
"| 0 N/A N/A 1747 G telegram-desktop 4MiB |\n",
"| 0 N/A N/A 4701 G ...5/usr/lib/firefox/firefox 175MiB |\n",
"| 0 N/A N/A 804722 G ...RendererForSitePerProcess 363MiB |\n",
"| 0 N/A N/A 867357 G ...996071496053229024,131072 35MiB |\n",
"+-----------------------------------------------------------------------------+\n"
]
}
],
"source": [
"gpu_info = !nvidia-smi\n",
"gpu_info = '\\n'.join(gpu_info)\n",
"if gpu_info.find('failed') >= 0:\n",
" print('Not connected to a GPU')\n",
"else:\n",
" print(gpu_info)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "c8eh87Hoee5d"
},
"outputs": [],
"source": [
"#%%capture\n",
"#!pip install datasets==1.13.3\n",
"#!pip install transformers==4.11.3\n",
"#!pip install huggingface_hub==0.1\n",
"#!pip install torchaudio==0.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html\n",
"#!pip install jiwer"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 5334,
"status": "ok",
"timestamp": 1641588811766,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "2MMXcWFFgCXU",
"outputId": "be9fd72e-4395-4cd0-ff87-631dad046e71"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Reusing dataset common_voice_10_0 (/home/robinhad/.cache/huggingface/datasets/mozilla-foundation___common_voice_10_0/uk/10.0.0/27df768ab1b5cac48a7616f145b79b62599167b0ffa2e054bf4c3e74e9619e5e)\n",
"Reusing dataset common_voice_10_0 (/home/robinhad/.cache/huggingface/datasets/mozilla-foundation___common_voice_10_0/uk/10.0.0/27df768ab1b5cac48a7616f145b79b62599167b0ffa2e054bf4c3e74e9619e5e)\n"
]
}
],
"source": [
"from datasets import load_dataset, load_metric, Audio\n",
"\n",
"common_voice_train = load_dataset(\"mozilla-foundation/common_voice_10_0\", \"uk\", split=\"train\", use_auth_token=True)\n",
"common_voice_test = load_dataset(\"mozilla-foundation/common_voice_10_0\", \"uk\", split=\"test\", use_auth_token=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Dataset({\n",
" features: ['client_id', 'path', 'audio', 'sentence', 'up_votes', 'down_votes', 'age', 'gender', 'accent', 'locale', 'segment'],\n",
" num_rows: 11463\n",
"})"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"common_voice_train"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"common_voice_train.cleanup_cache_files()\n",
"common_voice_test.cleanup_cache_files()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"id": "kbyq6lDgQc2a"
},
"outputs": [],
"source": [
"common_voice_train = common_voice_train.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])\n",
"common_voice_test = common_voice_test.remove_columns([\"accent\", \"age\", \"client_id\", \"down_votes\", \"gender\", \"locale\", \"segment\", \"up_votes\"])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "72737oog2F6U"
},
"outputs": [],
"source": [
"from datasets import ClassLabel\n",
"import random\n",
"import pandas as pd\n",
"from IPython.display import display, HTML\n",
"\n",
"def show_random_elements(dataset, num_examples=10):\n",
" assert num_examples <= len(dataset), \"Can't pick more elements than there are in the dataset.\"\n",
" picks = []\n",
" for _ in range(num_examples):\n",
" pick = random.randint(0, len(dataset)-1)\n",
" while pick in picks:\n",
" pick = random.randint(0, len(dataset)-1)\n",
" picks.append(pick)\n",
" \n",
" df = pd.DataFrame(dataset[picks])\n",
" display(HTML(df.to_html()))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 363
},
"executionInfo": {
"elapsed": 39,
"status": "ok",
"timestamp": 1641588811771,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "K_JUmf3G3b9S",
"outputId": "8603c909-09e1-43ae-f7c2-b27b25d795a3"
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
" \n",
" \n",
" | \n",
" sentence | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Вона нас не лякає. | \n",
"
\n",
" \n",
" 1 | \n",
" Бейнбрідж затримався, готуючи екіпажі, й фактично не встиг узяти участі у війні. | \n",
"
\n",
" \n",
" 2 | \n",
" А тепер. | \n",
"
\n",
" \n",
" 3 | \n",
" Наші \"будьонівці\", ніби з цікавості, зібралися й оточили червоні шеренги. | \n",
"
\n",
" \n",
" 4 | \n",
" Серед квітів я вмирав, Серед хмар я воскресав. | \n",
"
\n",
" \n",
" 5 | \n",
" Сьогодні виробництво полімерів найбільша галузь хімічної промисловості. | \n",
"
\n",
" \n",
" 6 | \n",
" Хмельницький заплатив за все на цілий рік наперед. | \n",
"
\n",
" \n",
" 7 | \n",
" Соловій же залишився підпалити бікфордів шнур. | \n",
"
\n",
" \n",
" 8 | \n",
" Тоді його слово буде хвилювати, захоплювати, піднімати людську душу. | \n",
"
\n",
" \n",
" 9 | \n",
" Тут були яблуні, сливи, вишні, — вишень найбільше. | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"show_random_elements(common_voice_train.remove_columns([\"path\", \"audio\"]), num_examples=10)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 30,
"status": "ok",
"timestamp": 1641588811775,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "XIHocAuTQbBR",
"outputId": "e8392853-e0d1-45ba-df74-065c50565654"
},
"outputs": [
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.00995326042175293,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 12,
"unit": "ba",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "4c0c81459dfb4ede8f0ec6fe25a0807e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/12 [00:00, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007294893264770508,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 7,
"unit": "ba",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "66c16ae632444339ae8ec80070398586",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/7 [00:00, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"filter_func = lambda x: not (\"joki\" in x or \"ы\" in x)\n",
"common_voice_train = common_voice_train.filter(filter_func, input_columns=[\"sentence\"])\n",
"common_voice_test = common_voice_test.filter(filter_func, input_columns=[\"sentence\"])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"id": "ZcVsD0ETElrR"
},
"outputs": [
{
"data": {
"text/plain": [
"{'sentence': \"привіт як у тебе справи загалом м'якотілий друже\"}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def cleaner(batch):\n",
" replace_as_space = \"!:;,—…–“”?\\\"«»\"\n",
" special_words = {\n",
" \"ХIХ\": \"дев'ятнадцятого\",\n",
" \"Linux\": \"Лінукс\",\n",
" \"Maace\": \"Маасе\",\n",
" \"м 'ясо\": \"м'ясо\",\n",
" \"'іде\": \"іде\",\n",
" \"Д'Аламбер\": \"даламбер\",\n",
" \" - \": \" \",\n",
" \"--\": \" \",\n",
" \"....\": \" \",\n",
" \"...\": \" \",\n",
" \"..\": \" \",\n",
" \" '\": \" \",\n",
" \"О'\": \"о\",\n",
" \"-\": \"\" #further check needed\n",
" }\n",
" # check abbreviations later\n",
" abbreviations = {\n",
" 'ЧК': \"чека\",\n",
" 'ҐПУ': \"ґепеу\",\n",
" 'ЄС.': \"єес\",\n",
" 'УНР': \"уенер\",\n",
" 'ДТП.': \"детепе\",\n",
" 'РНБО': \"еренбео\",\n",
" 'СРСР': \"есересер\",\n",
" 'ДБР': \"дебеер\",\n",
" 'КП': \"капе\",\n",
" 'ОС': \"оес\",\n",
" } \n",
" chars_dict = {\n",
" \"C\": \"С\",\n",
" \"I\": \"І\",\n",
" \"P\": \"Р\",\n",
" \"a\": \"а\",\n",
" \"e\": \"е\",\n",
" \"x\": \"х\",\n",
" \"y\": \"у\",\n",
" \"p\": \"р\",\n",
" \"o\": \"о\",\n",
" \"i\": \"і\",\n",
" \"\\u0301\": \"\",\n",
" \"`\": \"'\",\n",
" \"՚\": \"'\",\n",
" \".\": \" \",\n",
" \"’\": \"'\"\n",
" \n",
" }\n",
" for word in special_words.keys():\n",
" batch[\"sentence\"] = batch[\"sentence\"].replace(word, special_words[word])\n",
" for word in abbreviations.keys():\n",
" batch[\"sentence\"] = batch[\"sentence\"].replace(word, abbreviations[word])\n",
" for char in chars_dict.keys():\n",
" batch[\"sentence\"] = batch[\"sentence\"].replace(char, chars_dict[char])\n",
" for char in replace_as_space:\n",
" batch[\"sentence\"] = batch[\"sentence\"].replace(char, \" \")\n",
" batch[\"sentence\"] = \" \".join(filter(lambda x: x != \"\", batch[\"sentence\"].strip().lower().split(\" \")))\n",
" return batch\n",
"\n",
"sentence = {\"sentence\": \"Привіт, - як у тебе справи загалом, м'якотілий друже?\"}\n",
"cleaner(sentence)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 401
},
"executionInfo": {
"elapsed": 32,
"status": "ok",
"timestamp": 1641588811774,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "6falIJSBED65",
"outputId": "2f0ca829-dbfa-4d70-ee4a-ded2ae342117"
},
"outputs": [
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.00716710090637207,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 11463,
"unit": "ex",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "ca29db891c7f4d0cbd328a65477d2392",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/11463 [00:00, ?ex/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" | \n",
" sentence | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" так так усе на світі кінчається | \n",
"
\n",
" \n",
" 1 | \n",
" комуністів тільки й є що воєнком та два ротні політруки | \n",
"
\n",
" \n",
" 2 | \n",
" уже й убитих чимало | \n",
"
\n",
" \n",
" 3 | \n",
" трупів не закопували | \n",
"
\n",
" \n",
" 4 | \n",
" до фастова дісталася з якимось польським обозом | \n",
"
\n",
" \n",
" 5 | \n",
" невже то ви були | \n",
"
\n",
" \n",
" 6 | \n",
" при отій купці отої нещасної духовної братії | \n",
"
\n",
" \n",
" 7 | \n",
" вирішуємо напасти на бригаду по дорозі не допустивши до села | \n",
"
\n",
" \n",
" 8 | \n",
" де то хто таке видав аби хлопи купували панські маєтки | \n",
"
\n",
" \n",
" 9 | \n",
" коні пішли з коноводами в балку | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"show_random_elements(common_voice_train.map(cleaner).remove_columns([\"path\", \"audio\"]), num_examples=10)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading cached processed dataset at /home/robinhad/.cache/huggingface/datasets/mozilla-foundation___common_voice_10_0/uk/10.0.0/27df768ab1b5cac48a7616f145b79b62599167b0ffa2e054bf4c3e74e9619e5e/cache-890587fbc5f83609.arrow\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007320880889892578,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 6783,
"unit": "ex",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "8261bf1a7bd747fb88f7e063c24273d4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/6783 [00:00, ?ex/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"common_voice_train = common_voice_train.map(cleaner)\n",
"common_voice_test = common_voice_test.map(cleaner)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 363
},
"executionInfo": {
"elapsed": 24,
"status": "ok",
"timestamp": 1641588811775,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "RBDRAAYxRE6n",
"outputId": "a16beae1-84e6-4388-d601-2ed3a92bf451"
},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
" | \n",
" sentence | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" я замилувався маневруванням тачанок на полях | \n",
"
\n",
" \n",
" 1 | \n",
" андрій із бугаєм вилізли на близький горб роздивилися | \n",
"
\n",
" \n",
" 2 | \n",
" вона нам потрібна як щоденний хліб | \n",
"
\n",
" \n",
" 3 | \n",
" олесеві ще більше захотілось чаю | \n",
"
\n",
" \n",
" 4 | \n",
" вирішуємо тут поснідати і з годину відпочити бо люди й коні потомлені | \n",
"
\n",
" \n",
" 5 | \n",
" воротилову сотню найдужче боявся він дрібного дощу який почав сіятись удосвіта й міг зашкодити | \n",
"
\n",
" \n",
" 6 | \n",
" люта злість піднялася в душі хлопця при вигляді оцього свого тирана | \n",
"
\n",
" \n",
" 7 | \n",
" також цього року в столиці виникла низка профспілкових організацій і був створений робочий клуб | \n",
"
\n",
" \n",
" 8 | \n",
" накидав того літа а вони в наших плавнях затрималися всю січ мені засмерділи | \n",
"
\n",
" \n",
" 9 | \n",
" їздять коло нас | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"show_random_elements(common_voice_train.remove_columns([\"path\",\"audio\"]))"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"id": "LwCshNbbeRZR"
},
"outputs": [],
"source": [
"def extract_all_chars(batch):\n",
" all_text = \" \".join(batch[\"sentence\"])\n",
" vocab = list(set(all_text))\n",
" return {\"vocab\": [vocab], \"all_text\": [all_text]}"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 81,
"referenced_widgets": [
"116786d9364a4a57b521cddaabeda688",
"9baa2f69aa9c4387bf1086a04ed78420",
"a1e2c04dc2cb45ea80bec125e3dbf56f",
"b6d46d40efa14b21814f41531f5a2f41",
"d8bf8dc5d6c84140a4e96c9c435b8f17",
"04ec68b059df4c628839c3ac29e2ebdd",
"427056895c674c428400bee0f5b43995",
"d518f2c2ab6945b78a6d336dad6262bd",
"77f1a51099b24831ad8b2be3d2dc833a",
"5815ae1348994bfebba4a8e968489a96",
"22ba979142074f1d976e1a905544fd2d",
"8b6b7f28751c45c8869aa86eb2a0ab26",
"445c84e1e2e541f2a54fb989def386ae",
"68502fb433564eee8dfdf272ed7e4f56",
"1f3abdf2e0f6459da4179a94d691c4c4",
"48c60be3ca9349a295b83f65769c7f27",
"6c80bd8a8fe14a5989fe27445c14650f",
"5c2a7fea8c434d51ada69a0854b88baf",
"414efa8a08cd491cb78af8a95a151daa",
"c31a747e18df4b4aa4449a30e387448c",
"3dedffa30b774426bd474072a3a0d591",
"05d8496d54174ae298c319b0194fc710"
]
},
"executionInfo": {
"elapsed": 560,
"status": "ok",
"timestamp": 1641588812313,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "_m6uUjjcfbjH",
"outputId": "4cc94e18-9295-4414-c611-c98916fe3d4d"
},
"outputs": [
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.009069681167602539,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 1,
"unit": "ba",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "21abcee2f4f6401096ebfcc2b283f704",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007071256637573242,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 1,
"unit": "ba",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "82e2b6e9482345ba913c5800eab41275",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/1 [00:00, ?ba/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"vocab_train = common_voice_train.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_train.column_names)\n",
"vocab_test = common_voice_test.map(extract_all_chars, batched=True, batch_size=-1, keep_in_memory=True, remove_columns=common_voice_test.column_names)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"id": "aQfneNsmlJI0"
},
"outputs": [],
"source": [
"vocab_list = list(set(vocab_train[\"vocab\"][0]) | set(vocab_test[\"vocab\"][0]))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 18,
"status": "ok",
"timestamp": 1641588812314,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "_0kRndSvqaKk",
"outputId": "35c48e76-5060-470b-8405-bd6d288296ea"
},
"outputs": [
{
"data": {
"text/plain": [
"{' ': 0,\n",
" \"'\": 1,\n",
" 'а': 2,\n",
" 'б': 3,\n",
" 'в': 4,\n",
" 'г': 5,\n",
" 'д': 6,\n",
" 'е': 7,\n",
" 'ж': 8,\n",
" 'з': 9,\n",
" 'и': 10,\n",
" 'й': 11,\n",
" 'к': 12,\n",
" 'л': 13,\n",
" 'м': 14,\n",
" 'н': 15,\n",
" 'о': 16,\n",
" 'п': 17,\n",
" 'р': 18,\n",
" 'с': 19,\n",
" 'т': 20,\n",
" 'у': 21,\n",
" 'ф': 22,\n",
" 'х': 23,\n",
" 'ц': 24,\n",
" 'ч': 25,\n",
" 'ш': 26,\n",
" 'щ': 27,\n",
" 'ь': 28,\n",
" 'ю': 29,\n",
" 'я': 30,\n",
" 'є': 31,\n",
" 'і': 32,\n",
" 'ї': 33,\n",
" 'ґ': 34}"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vocab_dict = {v: k for k, v in enumerate(sorted(vocab_list))}\n",
"vocab_dict"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"id": "npbIbBoLgaFX"
},
"outputs": [],
"source": [
"vocab_dict[\"|\"] = vocab_dict[\" \"]\n",
"del vocab_dict[\" \"]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 15,
"status": "ok",
"timestamp": 1641588812316,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "znF0bNunsjbl",
"outputId": "480da4c9-b3d4-41c6-fc5c-b87b8b66202e"
},
"outputs": [
{
"data": {
"text/plain": [
"37"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vocab_dict[\"[UNK]\"] = len(vocab_dict)\n",
"vocab_dict[\"[PAD]\"] = len(vocab_dict)\n",
"len(vocab_dict)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"id": "ehyUoh9vk191"
},
"outputs": [],
"source": [
"import json\n",
"with open('vocab.json', 'w') as vocab_file:\n",
" json.dump(vocab_dict, vocab_file)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 8013,
"status": "ok",
"timestamp": 1641588820318,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "xriFGEWQkO4M",
"outputId": "a4497f75-d6f5-411a-d983-2ad519f65b8b"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n"
]
}
],
"source": [
"from transformers import Wav2Vec2CTCTokenizer\n",
"\n",
"tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(\"./\", unk_token=\"[UNK]\", pad_token=\"[PAD]\", word_delimiter_token=\"|\")"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"id": "A1XApZBAF2zr"
},
"outputs": [],
"source": [
"repo_name = \"wav2vec2-xls-r-300m-uk\""
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"id": "kAR0-2KLkopp"
},
"outputs": [],
"source": [
"from transformers import Wav2Vec2FeatureExtractor\n",
"\n",
"feature_extractor = Wav2Vec2FeatureExtractor(feature_size=1, sampling_rate=16000, padding_value=0.0, do_normalize=True, return_attention_mask=True)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"id": "KYZtoW-tlZgl"
},
"outputs": [],
"source": [
"from transformers import Wav2Vec2Processor\n",
"\n",
"processor = Wav2Vec2Processor(feature_extractor=feature_extractor, tokenizer=tokenizer)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"# save tokenizer to folder\n",
"processor.save_pretrained(repo_name)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 54
},
"executionInfo": {
"elapsed": 18,
"status": "ok",
"timestamp": 1641588820325,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "TTCS7W6XJ9BG",
"outputId": "18b0d44f-a498-4a79-f0a7-984fae48cad1"
},
"outputs": [
{
"data": {
"text/plain": [
"'/home/robinhad/.cache/huggingface/datasets/downloads/extracted/ee7155196e5d51620d53e48cf58eb693b7839b8ff183604c8bb948d3e0aad92d/cv-corpus-10.0-2022-07-04/uk/clips/common_voice_uk_20907128.mp3'"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"common_voice_train[0][\"path\"]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 863,
"status": "ok",
"timestamp": 1641588821172,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "qj_z5Zc3GAs9",
"outputId": "ace70f42-dcf0-445c-9b81-b23d4089c90d"
},
"outputs": [
{
"data": {
"text/plain": [
"{'path': '/home/robinhad/.cache/huggingface/datasets/downloads/extracted/ee7155196e5d51620d53e48cf58eb693b7839b8ff183604c8bb948d3e0aad92d/cv-corpus-10.0-2022-07-04/uk/clips/common_voice_uk_20907128.mp3',\n",
" 'array': array([ 0.0000000e+00, -3.5002383e-14, 9.4785833e-15, ...,\n",
" -5.0386465e-08, -4.4114326e-08, -1.9402206e-08], dtype=float32),\n",
" 'sampling_rate': 48000}"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"common_voice_train[0][\"audio\"]"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"id": "rrv65aj7G95i"
},
"outputs": [],
"source": [
"common_voice_train = common_voice_train.cast_column(\"audio\", Audio(sampling_rate=16_000))\n",
"common_voice_test = common_voice_test.cast_column(\"audio\", Audio(sampling_rate=16_000))"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 31,
"status": "ok",
"timestamp": 1641588821174,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "aKtkc1o_HWHC",
"outputId": "55538536-b8c6-484f-d695-5c8e0492747a"
},
"outputs": [
{
"data": {
"text/plain": [
"{'path': '/home/robinhad/.cache/huggingface/datasets/downloads/extracted/ee7155196e5d51620d53e48cf58eb693b7839b8ff183604c8bb948d3e0aad92d/cv-corpus-10.0-2022-07-04/uk/clips/common_voice_uk_20907128.mp3',\n",
" 'array': array([ 1.00456624e-13, -1.54340042e-13, 7.00158518e-13, ...,\n",
" -1.50335762e-08, -1.92623926e-08, -2.21930367e-08], dtype=float32),\n",
" 'sampling_rate': 16000}"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"common_voice_train[0][\"audio\"]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 80
},
"executionInfo": {
"elapsed": 27,
"status": "ok",
"timestamp": 1641588821175,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "dueM6U7Ev0OA",
"outputId": "8f8e14bf-6d59-43e2-ae2d-525bac8e5097"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"от би була рада\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import IPython.display as ipd\n",
"import numpy as np\n",
"import random\n",
"\n",
"rand_int = random.randint(0, len(common_voice_train)-1)\n",
"\n",
"print(common_voice_train[rand_int][\"sentence\"])\n",
"ipd.Audio(data=common_voice_train[rand_int][\"audio\"][\"array\"], autoplay=True, rate=16000)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 22,
"status": "ok",
"timestamp": 1641588821176,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "1Po2g7YPuRTx",
"outputId": "ad79ec8a-ab5a-4c52-edfa-a20d0eec2282"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Target text: от їхало якихось із десять на конях назустріч\n",
"Input array shape: (73152,)\n",
"Sampling rate: 16000\n"
]
}
],
"source": [
"rand_int = random.randint(0, len(common_voice_train)-1)\n",
"\n",
"print(\"Target text:\", common_voice_train[rand_int][\"sentence\"])\n",
"print(\"Input array shape:\", common_voice_train[rand_int][\"audio\"][\"array\"].shape)\n",
"print(\"Sampling rate:\", common_voice_train[rand_int][\"audio\"][\"sampling_rate\"])"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"id": "eJY7I0XAwe9p"
},
"outputs": [],
"source": [
"def prepare_dataset(batch):\n",
" audio = batch[\"audio\"]\n",
"\n",
" # batched output is \"un-batched\"\n",
" batch[\"input_values\"] = processor(audio[\"array\"], sampling_rate=audio[\"sampling_rate\"]).input_values[0]\n",
" batch[\"input_length\"] = len(batch[\"input_values\"])\n",
" \n",
" with processor.as_target_processor():\n",
" batch[\"labels\"] = processor(batch[\"sentence\"]).input_ids\n",
" return batch"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 87,
"referenced_widgets": [
"a29f88f174f8499082fbb36a36c47fa4",
"efc3bc0c48124ebeb79d245216eaf0fe",
"d45747150d0b434593a3a7c98399599a",
"ea73f7deb1c643f7b81de7fb7acaaf1b",
"18bc63944343440f837cdff76db004fc",
"9c875952cdd649a5bab87de9bb3f5200",
"aa329cb93df44a6da6012c7cc49d7489",
"b39b6e9131ca4ce3b31e84ceb04e1b83",
"c5eed102ef134a4e8ca41713b82ff6a4",
"e6e50da6516847878309fdc5c463edb3",
"a4ae510b4f3845f891a796cf844fc2bb"
]
},
"executionInfo": {
"elapsed": 107521,
"status": "ok",
"timestamp": 1641588928679,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "-np9xYK-wl8q",
"outputId": "779b4637-0606-4cc8-be3c-16c1c4241e63"
},
"outputs": [
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.00739741325378418,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 11463,
"unit": "ex",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "c51a283b0cf149d7a84ade53f6eb40d9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/11463 [00:00, ?ex/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.010543107986450195,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 6783,
"unit": "ex",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "9666ba72472f47db816037cec309e7ed",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/6783 [00:00, ?ex/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"common_voice_train = common_voice_train.map(prepare_dataset, remove_columns=common_voice_train.column_names)\n",
"common_voice_test = common_voice_test.map(prepare_dataset, remove_columns=common_voice_test.column_names)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"id": "tdHfbUJ_09iA"
},
"outputs": [],
"source": [
"#max_input_length_in_sec = 5.0\n",
"#common_voice_train = common_voice_train.filter(lambda x: x < max_input_length_in_sec * processor.feature_extractor.sampling_rate, input_columns=[\"input_length\"])"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"id": "tborvC9hx88e"
},
"outputs": [],
"source": [
"import torch\n",
"\n",
"from dataclasses import dataclass, field\n",
"from typing import Any, Dict, List, Optional, Union\n",
"\n",
"@dataclass\n",
"class DataCollatorCTCWithPadding:\n",
" \"\"\"\n",
" Data collator that will dynamically pad the inputs received.\n",
" Args:\n",
" processor (:class:`~transformers.Wav2Vec2Processor`)\n",
" The processor used for proccessing the data.\n",
" padding (:obj:`bool`, :obj:`str` or :class:`~transformers.tokenization_utils_base.PaddingStrategy`, `optional`, defaults to :obj:`True`):\n",
" Select a strategy to pad the returned sequences (according to the model's padding side and padding index)\n",
" among:\n",
" * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single\n",
" sequence if provided).\n",
" * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the\n",
" maximum acceptable input length for the model if that argument is not provided.\n",
" * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of\n",
" different lengths).\n",
" \"\"\"\n",
"\n",
" processor: Wav2Vec2Processor\n",
" padding: Union[bool, str] = True\n",
"\n",
" def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:\n",
" # split inputs and labels since they have to be of different lenghts and need\n",
" # different padding methods\n",
" input_features = [{\"input_values\": feature[\"input_values\"]} for feature in features]\n",
" label_features = [{\"input_ids\": feature[\"labels\"]} for feature in features]\n",
"\n",
" batch = self.processor.pad(\n",
" input_features,\n",
" padding=self.padding,\n",
" return_tensors=\"pt\",\n",
" )\n",
" with self.processor.as_target_processor():\n",
" labels_batch = self.processor.pad(\n",
" label_features,\n",
" padding=self.padding,\n",
" return_tensors=\"pt\",\n",
" )\n",
"\n",
" # replace padding with -100 to ignore loss correctly\n",
" labels = labels_batch[\"input_ids\"].masked_fill(labels_batch.attention_mask.ne(1), -100)\n",
"\n",
" batch[\"labels\"] = labels\n",
"\n",
" return batch"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"id": "lbQf5GuZyQ4_"
},
"outputs": [],
"source": [
"data_collator = DataCollatorCTCWithPadding(processor=processor, padding=True)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"id": "9Xsux2gmyXso"
},
"outputs": [],
"source": [
"wer_metric = load_metric(\"wer\")\n",
"cer_metric = load_metric(\"cer\")\n",
"metrics = [wer_metric, cer_metric]"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"id": "1XZ-kjweyTy_"
},
"outputs": [],
"source": [
"def compute_metrics(pred):\n",
" pred_logits = pred.predictions\n",
" pred_ids = np.argmax(pred_logits, axis=-1)\n",
"\n",
" pred.label_ids[pred.label_ids == -100] = processor.tokenizer.pad_token_id\n",
"\n",
" pred_str = processor.batch_decode(pred_ids)\n",
" # we do not want to group tokens when computing the metrics\n",
" label_str = processor.batch_decode(pred.label_ids, group_tokens=False)\n",
"\n",
" wer = wer_metric.compute(predictions=pred_str, references=label_str)\n",
" cer = cer_metric.compute(predictions=pred_str, references=label_str)\n",
"\n",
" return {\"wer\": wer, \"cer\": cer}"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 9496,
"status": "ok",
"timestamp": 1641588938616,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "e7cqAWIayn6w",
"outputId": "b7b20ce9-e1b2-473f-8032-2a75f98dfa9e"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Some weights of the model checkpoint at facebook/wav2vec2-xls-r-300m were not used when initializing Wav2Vec2ForCTC: ['quantizer.codevectors', 'project_hid.bias', 'project_q.weight', 'quantizer.weight_proj.weight', 'quantizer.weight_proj.bias', 'project_q.bias', 'project_hid.weight']\n",
"- This IS expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
"- This IS NOT expected if you are initializing Wav2Vec2ForCTC from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
"Some weights of Wav2Vec2ForCTC were not initialized from the model checkpoint at facebook/wav2vec2-xls-r-300m and are newly initialized: ['lm_head.bias', 'lm_head.weight']\n",
"You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
]
}
],
"source": [
"from transformers import Wav2Vec2ForCTC\n",
"\n",
"model = Wav2Vec2ForCTC.from_pretrained(\n",
" \"facebook/wav2vec2-xls-r-300m\", \n",
" attention_dropout=0.3,\n",
" hidden_dropout=0.3,\n",
" feat_proj_dropout=0.3,\n",
" mask_time_prob=0.05,\n",
" layerdrop=0.3,\n",
" ctc_loss_reduction=\"mean\", \n",
" pad_token_id=processor.tokenizer.pad_token_id,\n",
" vocab_size=len(processor.tokenizer),\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"id": "oGI8zObtZ3V0"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/robinhad/Projects/Speech/wav2vec2-xls-r-ukrainian/.venv/lib/python3.9/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:1618: FutureWarning: The method `freeze_feature_extractor` is deprecated and will be removed in Transformers v5.Please use the equivalent `freeze_feature_encoder` method instead.\n",
" warnings.warn(\n"
]
}
],
"source": [
"model.freeze_feature_extractor()"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"id": "KbeKSV7uzGPP"
},
"outputs": [],
"source": [
"from transformers import TrainingArguments\n",
"\n",
"training_args = TrainingArguments(\n",
" output_dir=repo_name,\n",
" group_by_length=True,\n",
" per_device_train_batch_size=16,\n",
" gradient_accumulation_steps=6,\n",
" eval_accumulation_steps=4,\n",
" evaluation_strategy=\"steps\",\n",
" num_train_epochs=100,\n",
" gradient_checkpointing=True,\n",
" fp16=True,\n",
" save_steps=400,\n",
" eval_steps=400,\n",
" logging_steps=400,\n",
" learning_rate=3e-4,\n",
" warmup_steps=500,\n",
" save_total_limit=2,\n",
" report_to=\"tensorboard\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"executionInfo": {
"elapsed": 11063,
"status": "ok",
"timestamp": 1641588949674,
"user": {
"displayName": "Yurii Paniv",
"photoUrl": "https://lh3.googleusercontent.com/a/default-user=s64",
"userId": "13095662915325887123"
},
"user_tz": -120
},
"id": "rY7vBmFCPFgC",
"outputId": "2e89d5ea-5b25-44bf-8492-a6220b0b1c38"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using cuda_amp half precision backend\n"
]
}
],
"source": [
"from transformers import Trainer\n",
"\n",
"trainer = Trainer(\n",
" model=model,\n",
" data_collator=data_collator,\n",
" args=training_args,\n",
" compute_metrics=compute_metrics,\n",
" train_dataset=common_voice_train,\n",
" eval_dataset=common_voice_test,\n",
" tokenizer=processor.feature_extractor,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 409
},
"id": "9fRr9TG5pGBl",
"outputId": "c2a7c797-326c-4bd2-b167-9d2f41d77def"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"The following columns in the training set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"/home/robinhad/Projects/Speech/wav2vec2-xls-r-ukrainian/.venv/lib/python3.9/site-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
" warnings.warn(\n",
"***** Running training *****\n",
" Num examples = 11463\n",
" Num Epochs = 100\n",
" Instantaneous batch size per device = 16\n",
" Total train batch size (w. parallel, distributed & accumulation) = 96\n",
" Gradient Accumulation steps = 6\n",
" Total optimization steps = 11900\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007272958755493164,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 11900,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "deebda57b25f4f95b4915d7a8d479a62",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/11900 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 5.4923, 'learning_rate': 0.0002388, 'epoch': 3.36}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.0072269439697265625,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "19ecc3e6ca434f5789950aeaad863a08",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-400\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-400/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 3.336439609527588, 'eval_wer': 1.0, 'eval_cer': 1.0, 'eval_runtime': 211.1144, 'eval_samples_per_second': 32.13, 'eval_steps_per_second': 4.017, 'epoch': 3.36}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-400/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-400/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-6800] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 3.3451, 'learning_rate': 0.00029215789473684205, 'epoch': 6.72}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007369041442871094,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "775a551a1cec442a965bada0af3b83e6",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-800\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-800/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 3.3383235931396484, 'eval_wer': 1.0, 'eval_cer': 1.0, 'eval_runtime': 211.2077, 'eval_samples_per_second': 32.115, 'eval_steps_per_second': 4.015, 'epoch': 6.72}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-800/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-800/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-7200] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 3.329, 'learning_rate': 0.00028163157894736836, 'epoch': 10.08}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007276296615600586,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "20b67f8d1d164f4ea9f7668d619e6e7b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-1200\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-1200/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 3.3315727710723877, 'eval_wer': 1.0, 'eval_cer': 1.0, 'eval_runtime': 211.0671, 'eval_samples_per_second': 32.137, 'eval_steps_per_second': 4.018, 'epoch': 10.08}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-1200/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-1200/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-400] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 3.1437, 'learning_rate': 0.00027110526315789473, 'epoch': 13.44}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.006963014602661133,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "70c7241390304e4888951e93cdc4ca41",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-1600\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-1600/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 2.463456392288208, 'eval_wer': 0.9999594238182187, 'eval_cer': 0.9103366773973774, 'eval_runtime': 208.2248, 'eval_samples_per_second': 32.575, 'eval_steps_per_second': 4.073, 'epoch': 13.44}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-1600/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-1600/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-800] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 1.5745, 'learning_rate': 0.00026057894736842104, 'epoch': 16.8}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.0073626041412353516,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "a78f719c4e7c4b1d974da356c2390432",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-2000\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-2000/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 1.1160292625427246, 'eval_wer': 0.9768512882937715, 'eval_cer': 0.36920856433421695, 'eval_runtime': 211.5684, 'eval_samples_per_second': 32.061, 'eval_steps_per_second': 4.008, 'epoch': 16.8}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-2000/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-2000/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-1200] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 1.0611, 'learning_rate': 0.00025005263157894735, 'epoch': 20.17}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.0070798397064208984,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "b30e2ef5c27c4c71ad0d7027bfef821d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-2400\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-2400/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.9386810064315796, 'eval_wer': 0.891316697098803, 'eval_cer': 0.28140508810053505, 'eval_runtime': 210.3288, 'eval_samples_per_second': 32.25, 'eval_steps_per_second': 4.032, 'epoch': 20.17}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-2400/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-2400/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-1600] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.8741, 'learning_rate': 0.00023952631578947364, 'epoch': 23.53}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.0073435306549072266,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "b340f47813b74d458a3e18fb15da89be",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-2800\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-2800/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.9212636351585388, 'eval_wer': 0.8704402515723271, 'eval_cer': 0.26904808800689695, 'eval_runtime': 212.3022, 'eval_samples_per_second': 31.95, 'eval_steps_per_second': 3.994, 'epoch': 23.53}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-2800/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-2800/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-2000] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.7685, 'learning_rate': 0.00022899999999999998, 'epoch': 26.89}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007165431976318359,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "a190b8f6eb8e434ea38065c7331ca229",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-3200\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-3200/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8420816659927368, 'eval_wer': 0.8330898762426455, 'eval_cer': 0.24242257905154294, 'eval_runtime': 208.9367, 'eval_samples_per_second': 32.464, 'eval_steps_per_second': 4.059, 'epoch': 26.89}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-3200/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-3200/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-2400] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.6913, 'learning_rate': 0.0002184736842105263, 'epoch': 30.25}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007306575775146484,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "99146f2067db4c7e80bc098c573138de",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-3600\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-3600/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8760627508163452, 'eval_wer': 0.80908906471901, 'eval_cer': 0.23088894844415456, 'eval_runtime': 211.4392, 'eval_samples_per_second': 32.08, 'eval_steps_per_second': 4.011, 'epoch': 30.25}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-3600/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-3600/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-2800] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.6387, 'learning_rate': 0.0002079473684210526, 'epoch': 33.61}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007359981536865234,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "5cdc1f646ea4449c884719317058ad6d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-4000\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-4000/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.7959503531455994, 'eval_wer': 0.7825725299249341, 'eval_cer': 0.21715966587343358, 'eval_runtime': 212.6087, 'eval_samples_per_second': 31.904, 'eval_steps_per_second': 3.989, 'epoch': 33.61}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-4000/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-4000/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-3200] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.5837, 'learning_rate': 0.00019742105263157892, 'epoch': 36.97}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007372379302978516,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "6a1d3419c50d409c9bc2b6b239bda3b5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-4400\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-4400/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8115484714508057, 'eval_wer': 0.7656725502130249, 'eval_cer': 0.2103047112879113, 'eval_runtime': 213.1592, 'eval_samples_per_second': 31.821, 'eval_steps_per_second': 3.978, 'epoch': 36.97}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-4400/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-4400/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-3600] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.5456, 'learning_rate': 0.00018689473684210524, 'epoch': 40.33}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007362842559814453,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "9844a70e8695435094b62a58a5301f28",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-4800\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-4800/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8416911959648132, 'eval_wer': 0.7579833637654697, 'eval_cer': 0.21142836846914626, 'eval_runtime': 212.6331, 'eval_samples_per_second': 31.9, 'eval_steps_per_second': 3.988, 'epoch': 40.33}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-4800/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-4800/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-4000] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.5179, 'learning_rate': 0.00017636842105263155, 'epoch': 43.69}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.006958484649658203,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "e5e46905e0f04ab6a6129ba61a1d80ba",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-5200\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-5200/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8144774436950684, 'eval_wer': 0.7585311422195171, 'eval_cer': 0.20418917479004078, 'eval_runtime': 209.912, 'eval_samples_per_second': 32.314, 'eval_steps_per_second': 4.04, 'epoch': 43.69}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-5200/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-5200/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-4400] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.4861, 'learning_rate': 0.00016584210526315787, 'epoch': 47.06}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007211446762084961,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "eb38365a17bb4efab96f3dd48a145d42",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-5600\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-5600/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8354366421699524, 'eval_wer': 0.739216879691621, 'eval_cer': 0.20429572848826133, 'eval_runtime': 208.2486, 'eval_samples_per_second': 32.572, 'eval_steps_per_second': 4.072, 'epoch': 47.06}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-5600/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-5600/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-4800] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.4641, 'learning_rate': 0.0001553157894736842, 'epoch': 50.42}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.00705265998840332,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "129886198a2b49e3a62732bef0b76110",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-6000\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-6000/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8092752695083618, 'eval_wer': 0.7217691215256644, 'eval_cer': 0.19502232784312712, 'eval_runtime': 210.6162, 'eval_samples_per_second': 32.206, 'eval_steps_per_second': 4.026, 'epoch': 50.42}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-6000/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-6000/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-5200] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.4368, 'learning_rate': 0.00014478947368421052, 'epoch': 53.78}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007249355316162109,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "518e42e24b7d479ca5a8fe5247ad01f5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-6400\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-6400/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8010208010673523, 'eval_wer': 0.7055589369040374, 'eval_cer': 0.1889358514447713, 'eval_runtime': 211.7021, 'eval_samples_per_second': 32.04, 'eval_steps_per_second': 4.006, 'epoch': 53.78}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-6400/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-6400/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-5600] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.4207, 'learning_rate': 0.00013426315789473683, 'epoch': 57.14}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007280111312866211,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "310968ba59f84d3899c3fcfc37555a72",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-6800\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-6800/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8339575529098511, 'eval_wer': 0.7027388922702373, 'eval_cer': 0.18708892067561503, 'eval_runtime': 212.1919, 'eval_samples_per_second': 31.966, 'eval_steps_per_second': 3.996, 'epoch': 57.14}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-6800/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-6800/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-6000] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.3972, 'learning_rate': 0.00012376315789473682, 'epoch': 60.5}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.0073511600494384766,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "70c026d4141f4d41a79bef25278385a9",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-7200\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-7200/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8393998742103577, 'eval_wer': 0.6940961655508217, 'eval_cer': 0.18495784671120397, 'eval_runtime': 211.3395, 'eval_samples_per_second': 32.095, 'eval_steps_per_second': 4.013, 'epoch': 60.5}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-7200/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-7200/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-6400] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.3825, 'learning_rate': 0.00011323684210526315, 'epoch': 63.86}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.0074388980865478516,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "edc0cd14d08a4236ac788f2f71f3d547",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-7600\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-7600/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8245963454246521, 'eval_wer': 0.6890647190099412, 'eval_cer': 0.18329173433902804, 'eval_runtime': 212.3966, 'eval_samples_per_second': 31.936, 'eval_steps_per_second': 3.993, 'epoch': 63.86}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-7600/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-7600/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-6800] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.3719, 'learning_rate': 0.00010271052631578946, 'epoch': 67.23}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007239818572998047,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "8daa262ce82f405fb2f3c6c7c09873ad",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-8000\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-8000/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8630526065826416, 'eval_wer': 0.6835463582876852, 'eval_cer': 0.17931372960546071, 'eval_runtime': 209.1605, 'eval_samples_per_second': 32.43, 'eval_steps_per_second': 4.054, 'epoch': 67.23}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-8000/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-8000/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-7200] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.3601, 'learning_rate': 9.218421052631579e-05, 'epoch': 70.59}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.006955146789550781,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "da7b7cf93dab401c8db40d62be638e02",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-8400\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-8400/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8463570475578308, 'eval_wer': 0.6926354230066951, 'eval_cer': 0.18325944533956726, 'eval_runtime': 208.5926, 'eval_samples_per_second': 32.518, 'eval_steps_per_second': 4.065, 'epoch': 70.59}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-8400/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-8400/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-7600] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.3468, 'learning_rate': 8.16578947368421e-05, 'epoch': 73.95}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007674217224121094,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "9c1914cd880b4ff1a88cc4907460108f",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-8800\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-8800/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8564673662185669, 'eval_wer': 0.6686751876648407, 'eval_cer': 0.17537124277130026, 'eval_runtime': 221.3871, 'eval_samples_per_second': 30.639, 'eval_steps_per_second': 3.83, 'epoch': 73.95}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-8800/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-8800/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-8000] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.3326, 'learning_rate': 7.113157894736842e-05, 'epoch': 77.31}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007187604904174805,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "a0d41f2cff1744aba4aa9d61f2b141c1",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-9200\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-9200/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8846502900123596, 'eval_wer': 0.665672550213025, 'eval_cer': 0.1759911915609471, 'eval_runtime': 209.6762, 'eval_samples_per_second': 32.35, 'eval_steps_per_second': 4.044, 'epoch': 77.31}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-9200/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-9200/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-8400] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.3272, 'learning_rate': 6.0605263157894733e-05, 'epoch': 80.67}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007404804229736328,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "0fe22fe7b78943afa6eaab5153dde314",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-9600\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-9600/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8954363465309143, 'eval_wer': 0.6707242848447961, 'eval_cer': 0.17582328876375108, 'eval_runtime': 211.264, 'eval_samples_per_second': 32.107, 'eval_steps_per_second': 4.014, 'epoch': 80.67}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-9600/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-9600/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-8800] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.3213, 'learning_rate': 5.007894736842105e-05, 'epoch': 84.03}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.006964445114135742,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "65019eafe06d4894a31fd4b0c6503218",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-10000\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-10000/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8902531266212463, 'eval_wer': 0.6653885169405559, 'eval_cer': 0.17506772617636898, 'eval_runtime': 209.0979, 'eval_samples_per_second': 32.439, 'eval_steps_per_second': 4.056, 'epoch': 84.03}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-10000/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-10000/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-9200] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.3125, 'learning_rate': 3.955263157894737e-05, 'epoch': 87.39}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007245779037475586,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "f23bf08feef0414e844f231b57c88c3b",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-10400\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-10400/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.8979071974754333, 'eval_wer': 0.6685940353012781, 'eval_cer': 0.17526791797302577, 'eval_runtime': 209.5385, 'eval_samples_per_second': 32.371, 'eval_steps_per_second': 4.047, 'epoch': 87.39}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-10400/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-10400/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-9600] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.3095, 'learning_rate': 2.9026315789473685e-05, 'epoch': 90.75}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007394552230834961,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "3e53e86fd8a242a2b8ff424210dde90d",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-10800\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-10800/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.9134525060653687, 'eval_wer': 0.6639886386691012, 'eval_cer': 0.17337578260462444, 'eval_runtime': 214.8656, 'eval_samples_per_second': 31.569, 'eval_steps_per_second': 3.947, 'epoch': 90.75}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-10800/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-10800/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-10000] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.3009, 'learning_rate': 1.8526315789473684e-05, 'epoch': 94.12}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007333517074584961,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "4eee8fdf4ce940c6bf475a70a09d87c0",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-11200\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-11200/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.9179993867874146, 'eval_wer': 0.6621829985798336, 'eval_cer': 0.1734791074028989, 'eval_runtime': 209.042, 'eval_samples_per_second': 32.448, 'eval_steps_per_second': 4.057, 'epoch': 94.12}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-11200/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-11200/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-10400] due to args.save_total_limit\n",
"The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length. If input_length are not expected by `Wav2Vec2ForCTC.forward`, you can safely ignore this message.\n",
"***** Running Evaluation *****\n",
" Num examples = 6783\n",
" Batch size = 8\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'loss': 0.2967, 'learning_rate': 8e-06, 'epoch': 97.48}\n"
]
},
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.007021665573120117,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "",
"rate": null,
"total": 848,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "0dfbd82a81e24bec865be87a87258dfd",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/848 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Saving model checkpoint to wav2vec2-xls-r-300m-uk/checkpoint-11600\n",
"Configuration saved in wav2vec2-xls-r-300m-uk/checkpoint-11600/config.json\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'eval_loss': 0.9188246130943298, 'eval_wer': 0.660722256035707, 'eval_cer': 0.17280426731416873, 'eval_runtime': 211.3614, 'eval_samples_per_second': 32.092, 'eval_steps_per_second': 4.012, 'epoch': 97.48}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Model weights saved in wav2vec2-xls-r-300m-uk/checkpoint-11600/pytorch_model.bin\n",
"Feature extractor saved in wav2vec2-xls-r-300m-uk/checkpoint-11600/preprocessor_config.json\n",
"Deleting older checkpoint [wav2vec2-xls-r-300m-uk/checkpoint-10800] due to args.save_total_limit\n",
"\n",
"\n",
"Training completed. Do not forget to share your model on huggingface.co/models =)\n",
"\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'train_runtime': 40015.4, 'train_samples_per_second': 28.646, 'train_steps_per_second': 0.297, 'train_loss': 0.9631941716811236, 'epoch': 100.0}\n"
]
},
{
"data": {
"text/plain": [
"TrainOutput(global_step=11900, training_loss=0.9631941716811236, metrics={'train_runtime': 40015.4, 'train_samples_per_second': 28.646, 'train_steps_per_second': 0.297, 'train_loss': 0.9631941716811236, 'epoch': 100.0})"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"trainer.train()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Dropping the following result as it does not have all the necessary fields:\n",
"{'dataset': {'name': 'common_voice', 'type': 'common_voice', 'args': 'uk'}}\n"
]
}
],
"source": [
"trainer.create_model_card()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "R351I9IQp_9D"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"loading configuration file wav2vec2-xls-r-300m-uk/checkpoint-5600/config.json\n",
"Model config Wav2Vec2Config {\n",
" \"_name_or_path\": \"facebook/wav2vec2-xls-r-300m\",\n",
" \"activation_dropout\": 0.0,\n",
" \"adapter_kernel_size\": 3,\n",
" \"adapter_stride\": 2,\n",
" \"add_adapter\": false,\n",
" \"apply_spec_augment\": true,\n",
" \"architectures\": [\n",
" \"Wav2Vec2ForCTC\"\n",
" ],\n",
" \"attention_dropout\": 0.0,\n",
" \"bos_token_id\": 1,\n",
" \"classifier_proj_size\": 256,\n",
" \"codevector_dim\": 768,\n",
" \"contrastive_logits_temperature\": 0.1,\n",
" \"conv_bias\": true,\n",
" \"conv_dim\": [\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512,\n",
" 512\n",
" ],\n",
" \"conv_kernel\": [\n",
" 10,\n",
" 3,\n",
" 3,\n",
" 3,\n",
" 3,\n",
" 2,\n",
" 2\n",
" ],\n",
" \"conv_stride\": [\n",
" 5,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2,\n",
" 2\n",
" ],\n",
" \"ctc_loss_reduction\": \"mean\",\n",
" \"ctc_zero_infinity\": false,\n",
" \"diversity_loss_weight\": 0.1,\n",
" \"do_stable_layer_norm\": true,\n",
" \"eos_token_id\": 2,\n",
" \"feat_extract_activation\": \"gelu\",\n",
" \"feat_extract_dropout\": 0.0,\n",
" \"feat_extract_norm\": \"layer\",\n",
" \"feat_proj_dropout\": 0.0,\n",
" \"feat_quantizer_dropout\": 0.0,\n",
" \"final_dropout\": 0.0,\n",
" \"gradient_checkpointing\": false,\n",
" \"hidden_act\": \"gelu\",\n",
" \"hidden_dropout\": 0.0,\n",
" \"hidden_size\": 1024,\n",
" \"initializer_range\": 0.02,\n",
" \"intermediate_size\": 4096,\n",
" \"layer_norm_eps\": 1e-05,\n",
" \"layerdrop\": 0.0,\n",
" \"mask_feature_length\": 10,\n",
" \"mask_feature_min_masks\": 0,\n",
" \"mask_feature_prob\": 0.0,\n",
" \"mask_time_length\": 10,\n",
" \"mask_time_min_masks\": 2,\n",
" \"mask_time_prob\": 0.05,\n",
" \"model_type\": \"wav2vec2\",\n",
" \"num_adapter_layers\": 3,\n",
" \"num_attention_heads\": 16,\n",
" \"num_codevector_groups\": 2,\n",
" \"num_codevectors_per_group\": 320,\n",
" \"num_conv_pos_embedding_groups\": 16,\n",
" \"num_conv_pos_embeddings\": 128,\n",
" \"num_feat_extract_layers\": 7,\n",
" \"num_hidden_layers\": 24,\n",
" \"num_negatives\": 100,\n",
" \"output_hidden_size\": 1024,\n",
" \"pad_token_id\": 37,\n",
" \"proj_codevector_dim\": 768,\n",
" \"torch_dtype\": \"float32\",\n",
" \"transformers_version\": \"4.14.1\",\n",
" \"use_weighted_layer_sum\": false,\n",
" \"vocab_size\": 40\n",
"}\n",
"\n",
"loading weights file wav2vec2-xls-r-300m-uk/checkpoint-5600/pytorch_model.bin\n",
"All model checkpoint weights were used when initializing Wav2Vec2ForCTC.\n",
"\n",
"All the weights of Wav2Vec2ForCTC were initialized from the model checkpoint at wav2vec2-xls-r-300m-uk/checkpoint-5600.\n",
"If your task is similar to the task the model of the checkpoint was trained on, you can already use Wav2Vec2ForCTC for predictions without further training.\n",
"loading feature extractor configuration file wav2vec2-xls-r-300m-uk/preprocessor_config.json\n",
"Feature extractor Wav2Vec2FeatureExtractor {\n",
" \"do_normalize\": true,\n",
" \"feature_extractor_type\": \"Wav2Vec2FeatureExtractor\",\n",
" \"feature_size\": 1,\n",
" \"padding_side\": \"right\",\n",
" \"padding_value\": 0.0,\n",
" \"return_attention_mask\": true,\n",
" \"sampling_rate\": 16000\n",
"}\n",
"\n",
"Didn't find file wav2vec2-xls-r-300m-uk/tokenizer.json. We won't load it.\n",
"loading file wav2vec2-xls-r-300m-uk/vocab.json\n",
"loading file wav2vec2-xls-r-300m-uk/tokenizer_config.json\n",
"loading file wav2vec2-xls-r-300m-uk/added_tokens.json\n",
"loading file wav2vec2-xls-r-300m-uk/special_tokens_map.json\n",
"loading file None\n",
"Adding to the vocabulary\n",
"Adding to the vocabulary\n"
]
}
],
"source": [
"from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor\n",
"model = Wav2Vec2ForCTC.from_pretrained(repo_name + \"/checkpoint-11200\").to(\"cuda\")\n",
"processor = Wav2Vec2Processor.from_pretrained(repo_name)"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "jD7TZ1YS3S_K"
},
"source": [
"\n",
"Now, we will just take the first example of the test set, run it through the model and take the `argmax(...)` of the logits to retrieve the predicted token ids."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "pax07TnL3WZn"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"It is strongly recommended to pass the ``sampling_rate`` argument to this function. Failing to do so can result in silent errors that might be hard to debug.\n"
]
}
],
"source": [
"audio_id = 10\n",
"\n",
"input_dict = processor(common_voice_test[\"input_values\"], return_tensors=\"pt\", padding=True)\n",
"\n",
"logits = model(input_dict.input_values.to(\"cuda\")).logits\n",
"\n",
"pred_ids = torch.argmax(logits, dim=-1)[audio_id]\n",
"\n",
"common_voice_test_transcription = load_dataset(\"common_voice\", \"uk\", split=\"test\")\n",
"\n",
"print(\"Prediction:\")\n",
"print(processor.decode(pred_ids))\n",
"\n",
"print(\"\\nReference:\")\n",
"print(common_voice_test_transcription[audio_id][\"sentence\"].lower())"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"collapsed_sections": [],
"machine_shape": "hm",
"name": "Копія записника \"Fine-Tune XLS-R on Common Voice.ipynb\"",
"provenance": [
{
"file_id": "https://github.com/patrickvonplaten/notebooks/blob/master/Fine_Tune_XLS_R_on_Common_Voice.ipynb",
"timestamp": 1641583715050
}
]
},
"kernelspec": {
"display_name": "Python 3.9.13 (conda)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"vscode": {
"interpreter": {
"hash": "a5cdd9abf8df3af0fd61fdb3838d6c6f2f66a9ba4bf4484f45cd88abf9f04fe9"
}
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"04ec68b059df4c628839c3ac29e2ebdd": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"05d8496d54174ae298c319b0194fc710": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"116786d9364a4a57b521cddaabeda688": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_a1e2c04dc2cb45ea80bec125e3dbf56f",
"IPY_MODEL_b6d46d40efa14b21814f41531f5a2f41",
"IPY_MODEL_d8bf8dc5d6c84140a4e96c9c435b8f17"
],
"layout": "IPY_MODEL_9baa2f69aa9c4387bf1086a04ed78420"
}
},
"18bc63944343440f837cdff76db004fc": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_a4ae510b4f3845f891a796cf844fc2bb",
"placeholder": "",
"style": "IPY_MODEL_e6e50da6516847878309fdc5c463edb3",
"value": " 6962/6962 [01:46<00:00, 78.15ex/s]"
}
},
"1f3abdf2e0f6459da4179a94d691c4c4": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c31a747e18df4b4aa4449a30e387448c",
"max": 1,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_414efa8a08cd491cb78af8a95a151daa",
"value": 1
}
},
"22ba979142074f1d976e1a905544fd2d": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"3dedffa30b774426bd474072a3a0d591": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"414efa8a08cd491cb78af8a95a151daa": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"427056895c674c428400bee0f5b43995": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"445c84e1e2e541f2a54fb989def386ae": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"48c60be3ca9349a295b83f65769c7f27": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_05d8496d54174ae298c319b0194fc710",
"placeholder": "",
"style": "IPY_MODEL_3dedffa30b774426bd474072a3a0d591",
"value": " 1/1 [00:00<00:00, 11.09ba/s]"
}
},
"5815ae1348994bfebba4a8e968489a96": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"5c2a7fea8c434d51ada69a0854b88baf": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"68502fb433564eee8dfdf272ed7e4f56": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_5c2a7fea8c434d51ada69a0854b88baf",
"placeholder": "",
"style": "IPY_MODEL_6c80bd8a8fe14a5989fe27445c14650f",
"value": "100%"
}
},
"6c80bd8a8fe14a5989fe27445c14650f": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"77f1a51099b24831ad8b2be3d2dc833a": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"8b6b7f28751c45c8869aa86eb2a0ab26": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_68502fb433564eee8dfdf272ed7e4f56",
"IPY_MODEL_1f3abdf2e0f6459da4179a94d691c4c4",
"IPY_MODEL_48c60be3ca9349a295b83f65769c7f27"
],
"layout": "IPY_MODEL_445c84e1e2e541f2a54fb989def386ae"
}
},
"9baa2f69aa9c4387bf1086a04ed78420": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"9c875952cdd649a5bab87de9bb3f5200": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"a1e2c04dc2cb45ea80bec125e3dbf56f": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_427056895c674c428400bee0f5b43995",
"placeholder": "",
"style": "IPY_MODEL_04ec68b059df4c628839c3ac29e2ebdd",
"value": "100%"
}
},
"a29f88f174f8499082fbb36a36c47fa4": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HBoxModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HBoxModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HBoxView",
"box_style": "",
"children": [
"IPY_MODEL_d45747150d0b434593a3a7c98399599a",
"IPY_MODEL_ea73f7deb1c643f7b81de7fb7acaaf1b",
"IPY_MODEL_18bc63944343440f837cdff76db004fc"
],
"layout": "IPY_MODEL_efc3bc0c48124ebeb79d245216eaf0fe"
}
},
"a4ae510b4f3845f891a796cf844fc2bb": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"aa329cb93df44a6da6012c7cc49d7489": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"b39b6e9131ca4ce3b31e84ceb04e1b83": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"b6d46d40efa14b21814f41531f5a2f41": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_77f1a51099b24831ad8b2be3d2dc833a",
"max": 1,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_d518f2c2ab6945b78a6d336dad6262bd",
"value": 1
}
},
"c31a747e18df4b4aa4449a30e387448c": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"c5eed102ef134a4e8ca41713b82ff6a4": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
},
"d45747150d0b434593a3a7c98399599a": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_aa329cb93df44a6da6012c7cc49d7489",
"placeholder": "",
"style": "IPY_MODEL_9c875952cdd649a5bab87de9bb3f5200",
"value": "100%"
}
},
"d518f2c2ab6945b78a6d336dad6262bd": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "ProgressStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "ProgressStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"bar_color": null,
"description_width": ""
}
},
"d8bf8dc5d6c84140a4e96c9c435b8f17": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "HTMLModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "HTMLModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "HTMLView",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_22ba979142074f1d976e1a905544fd2d",
"placeholder": "",
"style": "IPY_MODEL_5815ae1348994bfebba4a8e968489a96",
"value": " 1/1 [00:00<00:00, 7.95ba/s]"
}
},
"e6e50da6516847878309fdc5c463edb3": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "DescriptionStyleModel",
"state": {
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "DescriptionStyleModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "StyleView",
"description_width": ""
}
},
"ea73f7deb1c643f7b81de7fb7acaaf1b": {
"model_module": "@jupyter-widgets/controls",
"model_module_version": "1.5.0",
"model_name": "FloatProgressModel",
"state": {
"_dom_classes": [],
"_model_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"_model_name": "FloatProgressModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/controls",
"_view_module_version": "1.5.0",
"_view_name": "ProgressView",
"bar_style": "success",
"description": "",
"description_tooltip": null,
"layout": "IPY_MODEL_c5eed102ef134a4e8ca41713b82ff6a4",
"max": 6962,
"min": 0,
"orientation": "horizontal",
"style": "IPY_MODEL_b39b6e9131ca4ce3b31e84ceb04e1b83",
"value": 6962
}
},
"efc3bc0c48124ebeb79d245216eaf0fe": {
"model_module": "@jupyter-widgets/base",
"model_module_version": "1.2.0",
"model_name": "LayoutModel",
"state": {
"_model_module": "@jupyter-widgets/base",
"_model_module_version": "1.2.0",
"_model_name": "LayoutModel",
"_view_count": null,
"_view_module": "@jupyter-widgets/base",
"_view_module_version": "1.2.0",
"_view_name": "LayoutView",
"align_content": null,
"align_items": null,
"align_self": null,
"border": null,
"bottom": null,
"display": null,
"flex": null,
"flex_flow": null,
"grid_area": null,
"grid_auto_columns": null,
"grid_auto_flow": null,
"grid_auto_rows": null,
"grid_column": null,
"grid_gap": null,
"grid_row": null,
"grid_template_areas": null,
"grid_template_columns": null,
"grid_template_rows": null,
"height": null,
"justify_content": null,
"justify_items": null,
"left": null,
"margin": null,
"max_height": null,
"max_width": null,
"min_height": null,
"min_width": null,
"object_fit": null,
"object_position": null,
"order": null,
"overflow": null,
"overflow_x": null,
"overflow_y": null,
"padding": null,
"right": null,
"top": null,
"visibility": null,
"width": null
}
}
}
}
},
"nbformat": 4,
"nbformat_minor": 4
}