{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Install Requirements and Depedencies, Also login to huggingface through command line" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vK6B3ubS21Ky", "outputId": "1fc66279-80cf-4dac-95b8-8791cdbc00d3" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Collecting datasets==1.18.3\n", " Downloading datasets-1.18.3-py3-none-any.whl (311 kB)\n", "\u001b[K |████████████████████████████████| 311 kB 5.1 MB/s \n", "\u001b[?25hCollecting huggingface-hub<1.0.0,>=0.1.0\n", " Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)\n", "\u001b[K |████████████████████████████████| 67 kB 5.1 MB/s \n", "\u001b[?25hCollecting aiohttp\n", " Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n", "\u001b[K |████████████████████████████████| 1.1 MB 52.3 MB/s \n", "\u001b[?25hRequirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (0.70.12.2)\n", "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (0.3.4)\n", "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (4.62.3)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (1.3.5)\n", "Requirement already satisfied: pyarrow!=4.0.0,>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (6.0.1)\n", "Collecting fsspec[http]>=2021.05.0\n", " Downloading fsspec-2022.1.0-py3-none-any.whl (133 kB)\n", "\u001b[K |████████████████████████████████| 133 kB 52.1 MB/s \n", "\u001b[?25hRequirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (4.10.1)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (21.3)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (1.19.5)\n", "Collecting xxhash\n", " Downloading xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243 kB)\n", "\u001b[K |████████████████████████████████| 243 kB 52.3 MB/s \n", "\u001b[?25hRequirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (2.23.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==1.18.3) (3.10.0.2)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==1.18.3) (3.4.2)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==1.18.3) (3.13)\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets==1.18.3) (3.0.7)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==1.18.3) (3.0.4)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==1.18.3) (2.10)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==1.18.3) (1.24.3)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==1.18.3) (2021.10.8)\n", "Collecting aiosignal>=1.1.2\n", " Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)\n", "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==1.18.3) (2.0.11)\n", "Collecting asynctest==0.13.0\n", " Downloading asynctest-0.13.0-py3-none-any.whl (26 kB)\n", "Collecting frozenlist>=1.1.1\n", " Downloading frozenlist-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)\n", "\u001b[K |████████████████████████████████| 144 kB 60.9 MB/s \n", "\u001b[?25hCollecting yarl<2.0,>=1.0\n", " Downloading yarl-1.7.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (271 kB)\n", "\u001b[K |████████████████████████████████| 271 kB 47.4 MB/s \n", "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==1.18.3) (21.4.0)\n", "Collecting async-timeout<5.0,>=4.0.0a3\n", " Downloading async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n", "Collecting multidict<7.0,>=4.5\n", " Downloading multidict-6.0.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (94 kB)\n", "\u001b[K |████████████████████████████████| 94 kB 1.8 MB/s \n", "\u001b[?25hRequirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->datasets==1.18.3) (3.7.0)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets==1.18.3) (2.8.2)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets==1.18.3) (2018.9)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets==1.18.3) (1.15.0)\n", "Installing collected packages: multidict, frozenlist, yarl, asynctest, async-timeout, aiosignal, fsspec, aiohttp, xxhash, huggingface-hub, datasets\n", "Successfully installed aiohttp-3.8.1 aiosignal-1.2.0 async-timeout-4.0.2 asynctest-0.13.0 datasets-1.18.3 frozenlist-1.3.0 fsspec-2022.1.0 huggingface-hub-0.4.0 multidict-6.0.2 xxhash-2.0.2 yarl-1.7.2\n", "Collecting transformers\n", " Downloading transformers-4.16.2-py3-none-any.whl (3.5 MB)\n", "\u001b[K |████████████████████████████████| 3.5 MB 5.8 MB/s \n", "\u001b[?25hRequirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.19.5)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n", "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.10.1)\n", "Collecting sacremoses\n", " Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)\n", "\u001b[K |████████████████████████████████| 895 kB 53.6 MB/s \n", "\u001b[?25hRequirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.4.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.2)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n", "Collecting pyyaml>=5.1\n", " Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)\n", "\u001b[K |████████████████████████████████| 596 kB 59.6 MB/s \n", "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n", "Collecting tokenizers!=0.11.3,>=0.10.1\n", " Downloading tokenizers-0.11.4-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.8 MB)\n", "\u001b[K |████████████████████████████████| 6.8 MB 50.5 MB/s \n", "\u001b[?25hRequirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.7)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.7.0)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n", "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n", "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n", "Installing collected packages: pyyaml, tokenizers, sacremoses, transformers\n", " Attempting uninstall: pyyaml\n", " Found existing installation: PyYAML 3.13\n", " Uninstalling PyYAML-3.13:\n", " Successfully uninstalled PyYAML-3.13\n", "Successfully installed pyyaml-6.0 sacremoses-0.0.47 tokenizers-0.11.4 transformers-4.16.2\n", "Collecting pyctcdecode\n", " Downloading pyctcdecode-0.3.0-py2.py3-none-any.whl (43 kB)\n", "\u001b[K |████████████████████████████████| 43 kB 1.2 MB/s \n", "\u001b[?25hCollecting pygtrie<3.0,>=2.1\n", " Downloading pygtrie-2.4.2.tar.gz (35 kB)\n", "Collecting hypothesis<7,>=6.14\n", " Downloading hypothesis-6.36.1-py3-none-any.whl (376 kB)\n", "\u001b[K |████████████████████████████████| 376 kB 10.3 MB/s \n", "\u001b[?25hRequirement already satisfied: numpy<2.0.0,>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from pyctcdecode) (1.19.5)\n", "Requirement already satisfied: attrs>=19.2.0 in /usr/local/lib/python3.7/dist-packages (from hypothesis<7,>=6.14->pyctcdecode) (21.4.0)\n", "Requirement already satisfied: sortedcontainers<3.0.0,>=2.1.0 in /usr/local/lib/python3.7/dist-packages (from hypothesis<7,>=6.14->pyctcdecode) (2.4.0)\n", "Building wheels for collected packages: pygtrie\n", " Building wheel for pygtrie (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for pygtrie: filename=pygtrie-2.4.2-py3-none-any.whl size=19063 sha256=8df2d93eca581ee990684593e9aea4702e3fc1600c3b12431977e7f7f2a72d2c\n", " Stored in directory: /root/.cache/pip/wheels/d3/f8/ba/1d828b1603ea422686eb694253a43cb3a5901ea4696c1e0603\n", "Successfully built pygtrie\n", "Installing collected packages: pygtrie, hypothesis, pyctcdecode\n", "Successfully installed hypothesis-6.36.1 pyctcdecode-0.3.0 pygtrie-2.4.2\n", "Collecting jiwer\n", " Downloading jiwer-2.3.0-py3-none-any.whl (15 kB)\n", "Collecting python-Levenshtein==0.12.2\n", " Downloading python-Levenshtein-0.12.2.tar.gz (50 kB)\n", "\u001b[K |████████████████████████████████| 50 kB 3.0 MB/s \n", "\u001b[?25hRequirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from python-Levenshtein==0.12.2->jiwer) (57.4.0)\n", "Building wheels for collected packages: python-Levenshtein\n", " Building wheel for python-Levenshtein (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for python-Levenshtein: filename=python_Levenshtein-0.12.2-cp37-cp37m-linux_x86_64.whl size=149862 sha256=9f2040039739ea899365d0e09ec9144de3fa92b4964a8c65af9b5f775f2379a5\n", " Stored in directory: /root/.cache/pip/wheels/05/5f/ca/7c4367734892581bb5ff896f15027a932c551080b2abd3e00d\n", "Successfully built python-Levenshtein\n", "Installing collected packages: python-Levenshtein, jiwer\n", "Successfully installed jiwer-2.3.0 python-Levenshtein-0.12.2\n", "Collecting https://github.com/kpu/kenlm/archive/master.zip\n", " Downloading https://github.com/kpu/kenlm/archive/master.zip (541 kB)\n", "\u001b[K |████████████████████████████████| 541 kB 3.0 MB/s \n", "\u001b[?25hBuilding wheels for collected packages: kenlm\n", " Building wheel for kenlm (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for kenlm: filename=kenlm-0.0.0-cp37-cp37m-linux_x86_64.whl size=2336711 sha256=8be5703f0c372f4d8b2a3d9bdb5e5c1511bd7167b8ae6c34ec4eccb3eba52d00\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-b2yxhs9a/wheels/3d/aa/02/7b4a2eab5d7a2a9391bd9680dbad6270808a147bc3b7047e4e\n", "Successfully built kenlm\n", "Installing collected packages: kenlm\n", "Successfully installed kenlm-0.0.0\n", "\n", " _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n", " _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n", " _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n", "\n", " To login, `huggingface_hub` now requires a token generated from https://huggingface.co/settings/token.\n", " (Deprecated, will be removed in v0.3.0) To login with username and password instead, interrupt with Ctrl+C.\n", " \n", "Token: \n", "Login successful\n", "Your token has been saved to /root/.huggingface/token\n", "\u001b[1m\u001b[31mAuthenticated through git-credential store but this isn't the helper defined on your machine.\n", "You might have to re-authenticate when pushing to the Hugging Face Hub. Run the following command in your terminal in case you want to set this credential helper as the default\n", "\n", "git config --global credential.helper store\u001b[0m\n" ] } ], "source": [ "!pip install datasets==1.18.3\n", "!pip install transformers\n", "!pip install pyctcdecode\n", "!pip install jiwer\n", "!pip install https://github.com/kpu/kenlm/archive/master.zip\n", "!huggingface-cli login" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Preprocessing Data, initialize loss function and evaluation function that edited from based on [This Github Repo](https://github.com/harveenchadha/bol/blob/main/demos/robus/evaluate_model_hf_with_lm.ipynb)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 81, "referenced_widgets": [ "cd9b21068c814aed8fa9c1653f077ce6", "3a0363dd03f34d1383d3583c6c8db403", "24a8bce353cb40028d2ce15f26cb5d4a", "2a79c7470a854128bcb075508dec31cc", "32bb1efb868846ab823f8c97979fc403", "0d99cd9e92d642cbba517e3e42b08a34", "d322bd1f9b544a72a816a54da9f86fb6", "b8124a9bb56b41819242a091136059a8", "1eed3e578be7465884d36ebfa649a2e9", "d411ccf809aa4510b7467fd688ab5d81", "eae4a425b4bc4564ac4ecf7c15421d77", "2e9a8d5a111645ae94fdeddb7dac1697", "ffb6ee6c418741de85927bb630b02484", "cc4af9bf4e7b4d16a554a6b56ab4e18a", "def774a3f0d64d45bf9dc275b932bf1c", "a12aed326d6e43a790c3bcaa0a0da700", "be1fef3458704c0388111b2c0d7a6e9f", "838163d1875c46b48659b029a9d1e742", "ca6facdc672e419ba3e74b5a5d706733", "9a6c4a585d2243c594fe51f80a3e4152", "f0dba9aed06149399de6e00f291b361a", "8dd56196f17f442a9527dea9788757f3" ] }, "id": "wUoMGUOH24Sv", "outputId": "e6c82489-de68-4f31-dd90-dd1a6742eade" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "cd9b21068c814aed8fa9c1653f077ce6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/1.90k [00:00\n", "WER: 0.086074 (8.6074)
\n", "CER: 0.024147 (2.4147)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 650, "referenced_widgets": [ "49d98065c30245e3b083afd2beccf55b", "766582275c41488fbb5bcc294c0c8421", "9ba73eadf4cb41b2ba4e780709561e41", "abf913d53d994642ab16fd00c16b7b22", "c865b7cab92549f980e728aad7ec40ca", "272060fa6a44425d971c66020afc5f44", "c6575b4d28854fdbbfb850cced35d9a4", "1fa3c890723e409388d5c6a90f8a49d8", "c86d723e17bb4eefb2a10a608dabeb2c", "3dc066040afc4b9d8b9523d93c947377", "8546ee9729f748b8b1aa36887eb592bd", "b6dd4e5993ec4d688bff12544cd9141b", "c5b211993fb04dab8845d86599e7096b", "04b73113fa164a46a41e5785f035279d", "344f1a95dc5e46a0b16079bf012fcd3d", "9662d28bf624493bbf26850431614b07", "262481fa1e584f69a14759ae7a07af61", "58b89b48987d4777b399228bae3ec233", "94ebe9ff64bc4616b45db847ac9b5d96", "297ecfc53d3d4b2c8fa53f7d0e710459", "bfe6ae000d484dfd8e16905bdb9f3d9e", "a647f35b4ddd459aa33f83272e928e65", "e3c3e61f69a549e28fc09805f102cba6", "73d869d117cc411e8fbf77652e0b10b5", "7ea87dd2b879446388e3826c54be1792", "82445a8df2dd42ef98bf29caebd47dd9", "32846fb844a6488cbc52f8f28bd48d13", "e0907cf9530f4aa4a96be6d8e9a38db3", "e162796b91a4457296a3bd05ef5d5b0e", "e55c2420e7c0474e91ccec7c8a354ba7", "b60acf14c54c47f9a230c262ff180081", "14cb749092d947c6af7189d916f1516b", "6a7d68c509414dbda67db37067d3f1b7", "7a8651376f7c4722a2de5d9dc05c18e1", "a0d051938dfb4fa09834d5eaabf67243", "3fb1a59e983b4efe9fc8d273675c0025", "de8ef2190e0941b7bc1a54fe1f0720a6", "cf3531d601684c379c4826663749c252", "0d75f9eec41849bf9694f7a8da4d44da", "8b0d69c0a9ba4ea7bf274a2ffc82681d", "e98532d01387474691c78c57470b07ff", "bb9ae3bf44d74f0891a46c95de124726", "c59d04c2f2e54a038b58b2fba28f517d", "e3720df1a92e4cbea334ddcea912437e", "a904502e5cc0490cb26efc69eafd9696", "1fd276c44db64304a9d6fb979e9a4f22", "cd0d24855756414dac99f02374ff64ea", "8b4c9162be654a81a3e3dc0435c7ce12", "7e1675af8a954c53aee7aab1c07f5bcb", "e288e54ddd6141b8bad18fec52c74403", "ab1f224dcba14f38ad7451c9f37b5d8e", "e483d723fa124caeb2b61e532ae7c37e", "69f5d1f23b524203a1d42a0fb1127643", "8925b2a18d344d2dbb54580d413854d2", "4060d00ea92b47acacff2c8d35abac2c", "58b59cdcf37343b88641849764e2a6dd", "08ef7de000dc4f87aabac5a0546946a6", "5cbedb80b2e24191a9d6ad9a9df37f66", "dbd69064331a4d0ab6cc15000384d9e1", "1f90132754524912aea327174c89afb0", "a559f0a0323343b4bc01c04db10def94", "86be5afa37eb48ecabdf0bf0715cd977", "9e6997a2c4564cf4bb58d3401d753750", "a7cfc215934a4fc28dadb69dbcbecda0", "b968ee3bc55647cebea1fe3ebb8b585d", "b14666c0e3b54286b0c7804bcafaa251", "214bca7d15474b6bbaf65a8baaf6938e", "464fbe564e4f45d6908240a2187f1566", "829c1b2e672f417ca7a93a91828e2111", "786f3596933b43b89b22aa87d4bddbca", "5d0a681ef05f48a88698ee038590fc2d", "cf8683c8ee2f439bafcb244875077bf4", "4ee7069a94ba4c2e87fea8698fe7ea71", "fafc2e6c02ca4f6e831a16816b7e7953", "802829898a0044fbb9a77e3d2a47e25c", "74d86de447694b55ac0020e9b8589ae5", "ac2d3b385131493b8955c1de79abcfa3", "87c89d76489d442fa94d370842f013c0", "ba50cf4bef8e4e8a90f76f34a4d4f38a", "266e072f40de42c6a77f2e52c179898b", "3e31bbc5bd774801a9a2f8cbf97d0800", "1d7e2f87a33a4915977bbf26a926b216", "5258a93ae9fe45b4b1394ca399dc440e", "f73effe6a5214fba86d7564717fd4048", "fced02554c2f42f3a5a7c6fc55ded35e", "7018d37a928f4c79a15a944f853bc1e2", "76e0f4137121461f8cd7ffb74621d85d", "d27a7b3b93ed4c7b99925258f0fede18", "98a1d8cfd672444ca4e564325bf211e9", "c50e208af43245ada113f69ae19d1eb5", "1e8464e95f28481c83f24940d0dafccc", "0048969ec9274e8d807b2d1c22ce121e", "b221e2b05e094dd6affe15f6be157733", "6c785b987cb84e81a2c9829fed9f4301", "f2b344fdb6e443ebbbed15382a513058", "c232fd2b94aa4b6aa165cc5d26616123", "88b3a84f5e6a48b2a8669d3775c41e32", "0e84a1c72e2f480db8403c63a41eed7b", "48d2fe7ca4384051a0b60af115198584", "9460c610d78d4549b180f0c2e92ff2d9", "1ef8ce7966224ae4a61bd0b517ce5196", "51d6d73eec8241cc986ff63a2590a548", "b689b639587144cc8289ad277463aeb0", "a3fbfaf9e5074c999f951ad978f34ca2", "a5b76b741bfc4a7ea07187cb308699b6", "67affed77a6b4f498d27a445605eacb3", "3b95d5ca3be5403bb4e7419e46a79da7", "6adec3610e074e7e8c49536d2be86322", "0c8454e211b84107a307a26cf3d8aa68", "b041683bf0a44266999f28b5dc4182b6", "abe77a36403447ceafa90e396bddfc34", "b3513c48dfc6424db5248ce28f0b3c5f", "92625d7f65f14b52adf31be50de92e2d", "df5b4e81f71e45bc8f36af9f1cf36db2", "4ef05084d0994c5aac1c971a3cac4e2c", "8fc2796f2c77462e85f73de6854d5141", "ee85b6da6cef437387dde23cb4ac3686", "2ff47be5e9a5451080483970781a0e0a", "7541767aad5142799a8af15ea0fdd871", "675f8fd9145e4efb993fd7a41881534c", "8a0101cd62c348ddbc4d9c1c8307fb01", "6032cb7c6361477da2c1149140643106", "cb9e59de35fb41eca36ec17a3af48301", "fb3a48509aa84ec4a50fe8d9801471a2", "09324aba807241059afc720e1e39a545", "255ec9439fbf458798d2eaa5e46304d7", "5a66244d02b44ecd8864d6d97053b484", "27b975e7e7f248408e004800fed5b74a", "feb00fc98cb54dabb37efeca4c803e26", "8418fd9475dc4077a4bb4fd4340c6170", "bce01755179e428380831d7888c34d15", "4f8573b90c854928bc40105c35134d66", "94d7b5a4e6a1417cb05acc38e9371741", "cfc4e796b3634c14a65bd3a46b8351aa", "59024c1e0f604640894561ae5ab8f22e", "709618c826f64d528e9348928bb91d9a", "df366bb40f5b45ca83801ada9b74e925", "987ffef43a314ee6be2cf6c50aad8c5a", "8a0e4a359a4c4b72b5ee5793c876d0d8", "2fcf49f39b79498791e06da5f24d74f4", "42c293bbe4b2458fa75289c7d54356ae", "40b1f8a97a9a4e4695f7a16bcc7e2cfd", "b86a98be9e5b4c72b5aa7de8595660c7", "d55c34579aba4900aa4007d8769daa7d", "2eaf135db31549ebb3de92215596c6cf", "1e678dc17998414e902f31468d962ffc", "09253159493749d5bf00fb4405a3ce1a", "8d05da5d6a9b4ae3bc7e0382580d053c", "bff7e4ce1576410b83053f90b92193e1", "8b6cd04018d54058b79e6f6f67be43ad", "ed6b3658c88b43e4968dbda33505c764", "a5c7a1c7366e4ab28c249a0084e2afd5", "5e235e4072b64be98ad189d975d414f8", "25112854007444db8ab3f3e73e42dee3", "664fcb93f92e4f4ba1033200ec6c4114", "5284f9d9f75549f1a828062b67eb7a67", "c0c0302e98b141f8b91434f16f6f308c", "129d0deaf86846a2a272427cdb66a8a6", "7b746d0ca75e4aab830b0cfb004300be", "6f2c240bab8945feb9458a2a0b75a6d2", "a049eafc68aa4c2d8e6fa180900630ac", "db7aca4562834b2d95b65963e082841e", "39c21d6b21c04d1c8806ab15e74e9959", "34dec0a8327340bf9b57b77632ee3148", "0295fcc8cf0e4e788d54c3c50cce8456", "0d7d3ea548a54b64ab05f6e4a9bebdbe", "8b32a5144b5d4e35b8815dd168412614", "f9815bf5da8e45d598ab9e343e0f7e17", "75061bc41b4e490f80daad06e7209931", "52fa77e88d674ff0a659116414b6817c", "b0c96641d3f840aaa3bbae8bde0b3eff", "394f8589e95347e5b031704045337b4f", "7db8754d92ed4b108e67a0451ecf1fd9", "3332bb67ca8c49648b1bbdacc232b32b", "d7b184954d1f4179b38bbe960433054c", "1eeb4b9f16d244a3a88c41b76828323f", "b6b534a967874bd6929998ac14a8ffb8", "85cc10ace1af472487827146e4a24a10", "3571999098194f479ba73cb11b455d02", "86529cc2b7cf42e3be6211be39448079", "27fb61552bc94d4888817a0c33891565", "17384aa7e9004db5b135d9cabb6ba711", "fbd31e8b2044444c9e90c52eb1d4a4da", "bd0b21c0bc2043d191dc99797077c212", "393e99082b524077a5f56dcfd590866f", "96f06d1a184141b587bb6c015a67fda3", "cfba7ebd3d4d4017ae9eb54f78a4ea8c", "abe0d0c93b8544079f9bfddf0748b40f", "43d4d9d667ed47a8accd1bd805b723ba", "50222671fcac4dfc9427d50e799f23a0", "75937502b5b64ea18ed39369d22618ab", "6b6aa926d50d4126969c04d3aff863dd", "5e814dc429a64befb2a84a0eedbb579c", "e79c5c908c3f49cbb65aaafa6eab28bc", "0cb0de92292c4b17a408441ad1891137", "5bbc21ca335b4ce095fa0d4de4a62cad", "db9547b473a448048abb08ebe5593c23", "a629e372eaae4fe5af9810e20fee210b", "9d9637615a204efaa214e955cfc47cdf", "aa9421151e3242a4b890c1b5879dd03e", "fa67af037b6e45f0a7a45a0ccb8eac96", "e64f0e0feefa4133a47ccbac8b1ef90a", "c8ac2ec2326d4874ac7b5d6751d7de89", "fc52f8d478be4abf96a47a8a3586d3eb", "8f9871b0f76d43199b921f31ceb9af25", "ad89ad2531e24b81bd0b8335f2434197", "1bf58fa01ad9455aa79e263fe81be55e", "80eae7c9336246d89f6ffe12f95f58b0", "eac27b22c02343e2b0ca0c913525baf9", "7923af20fdec430d992d9a95934f5088", "d9a871bbf515400c950ba7a5d50032fc", "4b996d8b7d3445e3b4986b2de49f8dd4", "03e28075ad5c47588a16df9e1cbee9f9", "51df9ac6db68494a8839f93d63607859", "3b8c8fc7cc2b4e269d79d30029cbe7e6", "9a07e8d23cbf45a098ff2d134be702f0", "8be390a819f64e93bcef979c257d0ce2", "9e2960424ef4475c939d780ea5029c4c", "bd13102e4dcb40619dbffb1ac8b5449b", "20124c03bc0c4030b27ed96e914267f1", "e36a6d5d7a9247119b3d0d6b1d3ab3cf", "f7c6097c17e34ece98f25bdd249b0cab", "5d98f6c1ae9c40c68e0a47e39e9120e9", "d588e476f1e6418a8dea3e17d42f6cfa", "daf94092c3ad4147ae6d4cbe79a225cd", "42f046ca89734f6bb7952131d084ed4d", "356145dbcc3144f6a0df456607226081", "523add936fb9437aa8db16c325c6eb64", "acac242658f24e48895d567a8306ef6b", "8ee4e328421f48879cdca1dd64d2fd0d", "bf39b090b0214c6fb60f592f4ddd250f", "4b30457402944d0b8e831a14e7d09d12", "e6f22b3244df4cb890493dc338a55319", "8b3e3b257b664fe2835f5ac4bb13dead", "b43772f1477a496e901b51447b3f3f9a", "e9ad0e08956f4b9a8d5c2c617eb09112", "48cdc05e2cde406c9fc7bbad22de26a3", "2e165ec2f7f14a78a95ed4f5201bfcc8", "ea4f6cfd57c54df79fd1593389d80d7f", "fec5fa8999b9466bad43470024208fbe", "5a4680c9026745ae9708fd2ec22256f0", "f3b2ce88806a4bd79466e642a38474c8" ] }, "id": "qDCG98D-27kP", "outputId": "cc6e6bdb-2b62-4667-9bbf-6d227dbed3dd" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "49d98065c30245e3b083afd2beccf55b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/262 [00:00\n", "WER: 0.201776 (20.1776)
\n", "CER: 0.047205 (4.7205)
" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 118, "referenced_widgets": [ "49a7045b02334a4aaab4bab4d0bd2f60", "daca29fab61e42db9bac105b77c51d96", "dd4ea618f7154cb2be0b4817bb3951c6", "a25abd70c89f41fd8fe921320ef2cc00", "e5205e10d0384fd38ed31c44e09f3627", "d0033daec00740f4b467c6cf1e8b0876", "511af2f213d5411a8c8cc7d04c02769e", "0196c7ac52004d0d8350c951ae7d6985", "d15865a64682490197d27f16bcb3a493", "b203afcf13e240edb02c2cecacdbd7e4", "714d117fb8ce45ae8e8030db594d3f8f" ] }, "id": "61tK0W4J25Dz", "outputId": "46ba6112-248c-4321-c374-52f8d2b7ed5f" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Reusing dataset common_voice (/root/.cache/huggingface/datasets/mozilla-foundation___common_voice/ia/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8)\n", "Loading cached processed dataset at /root/.cache/huggingface/datasets/mozilla-foundation___common_voice/ia/8.0.0/b8bc4d453193c06a43269b46cd87f075c70f152ac963b7f28f7a2760c45ec3e8/cache-f4febe81b8fd7934.arrow\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "49a7045b02334a4aaab4bab4d0bd2f60", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/427 [00:00