{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "id": "vK6B3ubS21Ky", "outputId": "7bf79e00-aba4-4702-a2ab-6c6d137af4e3" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: datasets==1.18.3 in /usr/local/lib/python3.7/dist-packages (1.18.3)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (3.8.1)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (21.3)\n", "Requirement already satisfied: xxhash in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (2.0.2)\n", "Requirement already satisfied: huggingface-hub<1.0.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (0.4.0)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (1.19.5)\n", "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (4.62.3)\n", "Requirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (0.3.4)\n", "Requirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (0.70.12.2)\n", "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (2.23.0)\n", "Requirement already satisfied: fsspec[http]>=2021.05.0 in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (2022.1.0)\n", "Requirement already satisfied: pyarrow!=4.0.0,>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (6.0.1)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (1.3.5)\n", "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets==1.18.3) (4.10.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==1.18.3) (3.10.0.2)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==1.18.3) (6.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets==1.18.3) (3.4.2)\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets==1.18.3) (3.0.7)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==1.18.3) (3.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==1.18.3) (2021.10.8)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==1.18.3) (2.10)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets==1.18.3) (1.24.3)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==1.18.3) (1.2.0)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==1.18.3) (4.0.2)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==1.18.3) (21.4.0)\n", "Requirement already satisfied: asynctest==0.13.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==1.18.3) (0.13.0)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==1.18.3) (1.7.2)\n", "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==1.18.3) (2.0.11)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==1.18.3) (1.3.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.7/dist-packages (from aiohttp->datasets==1.18.3) (6.0.2)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->datasets==1.18.3) (3.7.0)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets==1.18.3) (2018.9)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets==1.18.3) (2.8.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets==1.18.3) (1.15.0)\n", "Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (4.16.2)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (6.0)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (21.3)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n", "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.10.1)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.3)\n", "Requirement already satisfied: tokenizers!=0.11.3,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.11.4)\n", "Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers) (0.0.47)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.19.5)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.4.2)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.4.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<1.0,>=0.1.0->transformers) (3.10.0.2)\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging>=20.0->transformers) (3.0.7)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.7.0)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.10.8)\n", "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n", "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.1.0)\n", "Requirement already satisfied: pyctcdecode in /usr/local/lib/python3.7/dist-packages (0.3.0)\n", "Requirement already satisfied: numpy<2.0.0,>=1.15.0 in /usr/local/lib/python3.7/dist-packages (from pyctcdecode) (1.19.5)\n", "Requirement already satisfied: pygtrie<3.0,>=2.1 in /usr/local/lib/python3.7/dist-packages (from pyctcdecode) (2.4.2)\n", "Requirement already satisfied: hypothesis<7,>=6.14 in /usr/local/lib/python3.7/dist-packages (from pyctcdecode) (6.36.1)\n", "Requirement already satisfied: attrs>=19.2.0 in /usr/local/lib/python3.7/dist-packages (from hypothesis<7,>=6.14->pyctcdecode) (21.4.0)\n", "Requirement already satisfied: sortedcontainers<3.0.0,>=2.1.0 in /usr/local/lib/python3.7/dist-packages (from hypothesis<7,>=6.14->pyctcdecode) (2.4.0)\n", "Requirement already satisfied: jiwer in /usr/local/lib/python3.7/dist-packages (2.3.0)\n", "Requirement already satisfied: python-Levenshtein==0.12.2 in /usr/local/lib/python3.7/dist-packages (from jiwer) (0.12.2)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from python-Levenshtein==0.12.2->jiwer) (57.4.0)\n", "Collecting https://github.com/kpu/kenlm/archive/master.zip\n", " Using cached https://github.com/kpu/kenlm/archive/master.zip\n", "\n", " _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n", " _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n", " _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n", "\n", " To login, `huggingface_hub` now requires a token generated from https://huggingface.co/settings/token.\n", " (Deprecated, will be removed in v0.3.0) To login with username and password instead, interrupt with Ctrl+C.\n", " \n", "Token: \n", "Login successful\n", "Your token has been saved to /root/.huggingface/token\n", "\u001b[1m\u001b[31mAuthenticated through git-credential store but this isn't the helper defined on your machine.\n", "You might have to re-authenticate when pushing to the Hugging Face Hub. Run the following command in your terminal in case you want to set this credential helper as the default\n", "\n", "git config --global credential.helper store\u001b[0m\n" ] } ], "source": [ "!pip install datasets==1.18.3\n", "!pip install transformers\n", "!pip install pyctcdecode\n", "!pip install jiwer\n", "!pip install https://github.com/kpu/kenlm/archive/master.zip\n", "!huggingface-cli login" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 48, "referenced_widgets": [ "1671593a74f54e6aaa2f63ae16347b28", "634693e852884de2980acbc9d73fe995", "8671b1ab18af4388853dfba14ed18758", "28d015fe0f5a481f995374f38ecac3d1", "1d1dc74f886a489585fec0372b122214", "586e81066c5d47f2959a63cd89e24f5e", "7bc885e048c746cfbdd918f1102a72d5", "f8f8844e0d5945cd8daf2955e0b9ccaf", "81587ba20e8a47a38a83ae442acfc65e", "a8a4239d08564b19872f14440800b910", "a7d6c85610594e7ab6c121342c42e189" ] }, "id": "wUoMGUOH24Sv", "outputId": "af99b7cc-4dda-4054-87d9-d8a8c26c6e75" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1671593a74f54e6aaa2f63ae16347b28", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/2.16k [00:00