{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "id": "YP3vVkqYUpLx" }, "outputs": [], "source": [ "import os\n", "import shutil" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "AWly9SmkgSwE", "outputId": "8af190ed-5037-4e3b-b91b-b5286d8e0888" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/bin/bash: sudo: command not found\n" ] } ], "source": [ "!sudo apt-get install git-lfs tree" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OWGc_zfyq5_T", "outputId": "35ea3459-6f2d-449c-e717-74e7a27c41bf" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[33mWARNING: Ignoring invalid distribution -atasets (/opt/conda/lib/python3.8/site-packages)\u001b[0m\n", "\u001b[33mWARNING: Ignoring invalid distribution -atasets (/opt/conda/lib/python3.8/site-packages)\u001b[0m\n", "Requirement already satisfied: datasets in /workspace/.local/lib/python3.8/site-packages (2.2.2)\n", "Requirement already satisfied: transformers in /opt/conda/lib/python3.8/site-packages (4.17.0.dev0)\n", "Requirement already satisfied: packaging in /opt/conda/lib/python3.8/site-packages (from datasets) (21.3)\n", "Requirement already satisfied: responses<0.19 in /opt/conda/lib/python3.8/site-packages (from datasets) (0.18.0)\n", "Requirement already satisfied: fsspec[http]>=2021.05.0 in /opt/conda/lib/python3.8/site-packages (from datasets) (2022.1.0)\n", "Requirement already satisfied: requests>=2.19.0 in /opt/conda/lib/python3.8/site-packages (from datasets) (2.24.0)\n", "Requirement already satisfied: pyarrow>=6.0.0 in /opt/conda/lib/python3.8/site-packages (from datasets) (6.0.1)\n", "Requirement already satisfied: tqdm>=4.62.1 in /opt/conda/lib/python3.8/site-packages (from datasets) (4.62.3)\n", "Requirement already satisfied: pandas in /opt/conda/lib/python3.8/site-packages (from datasets) (1.4.0)\n", "Requirement already satisfied: numpy>=1.17 in /opt/conda/lib/python3.8/site-packages (from datasets) (1.19.2)\n", "Requirement already satisfied: multiprocess in /opt/conda/lib/python3.8/site-packages (from datasets) (0.70.12.2)\n", "Requirement already satisfied: huggingface-hub<1.0.0,>=0.1.0 in /opt/conda/lib/python3.8/site-packages (from datasets) (0.4.0)\n", "Requirement already satisfied: xxhash in /opt/conda/lib/python3.8/site-packages (from datasets) (2.0.2)\n", "Requirement already satisfied: dill<0.3.5 in /opt/conda/lib/python3.8/site-packages (from datasets) (0.3.4)\n", "Requirement already satisfied: aiohttp in /opt/conda/lib/python3.8/site-packages (from datasets) (3.8.1)\n", "Requirement already satisfied: sacremoses in /opt/conda/lib/python3.8/site-packages (from transformers) (0.0.47)\n", "Requirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.8/site-packages (from transformers) (5.4.1)\n", "Requirement already satisfied: filelock in /opt/conda/lib/python3.8/site-packages (from transformers) (3.0.12)\n", "Requirement already satisfied: tokenizers!=0.11.3,>=0.10.1 in /opt/conda/lib/python3.8/site-packages (from transformers) (0.11.4)\n", "Requirement already satisfied: regex!=2019.12.17 in /opt/conda/lib/python3.8/site-packages (from transformers) (2022.1.18)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /opt/conda/lib/python3.8/site-packages (from huggingface-hub<1.0.0,>=0.1.0->datasets) (4.0.1)\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.8/site-packages (from packaging->datasets) (3.0.7)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests>=2.19.0->datasets) (3.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests>=2.19.0->datasets) (2020.12.5)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests>=2.19.0->datasets) (1.25.11)\n", "Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests>=2.19.0->datasets) (2.10)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /opt/conda/lib/python3.8/site-packages (from aiohttp->datasets) (1.3.0)\n", "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /opt/conda/lib/python3.8/site-packages (from aiohttp->datasets) (2.0.10)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /opt/conda/lib/python3.8/site-packages (from aiohttp->datasets) (4.0.2)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /opt/conda/lib/python3.8/site-packages (from aiohttp->datasets) (6.0.2)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /opt/conda/lib/python3.8/site-packages (from aiohttp->datasets) (1.2.0)\n", "Requirement already satisfied: attrs>=17.3.0 in /opt/conda/lib/python3.8/site-packages (from aiohttp->datasets) (21.4.0)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /opt/conda/lib/python3.8/site-packages (from aiohttp->datasets) (1.7.2)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /opt/conda/lib/python3.8/site-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.8/site-packages (from pandas->datasets) (2021.1)\n", "Requirement already satisfied: click in /opt/conda/lib/python3.8/site-packages (from sacremoses->transformers) (8.0.3)\n", "Requirement already satisfied: six in /opt/conda/lib/python3.8/site-packages (from sacremoses->transformers) (1.15.0)\n", "Requirement already satisfied: joblib in /opt/conda/lib/python3.8/site-packages (from sacremoses->transformers) (1.1.0)\n", "\u001b[33mWARNING: Ignoring invalid distribution -atasets (/opt/conda/lib/python3.8/site-packages)\u001b[0m\n", "\u001b[33mWARNING: Ignoring invalid distribution -atasets (/opt/conda/lib/python3.8/site-packages)\u001b[0m\n", "\u001b[33mWARNING: Ignoring invalid distribution -atasets (/opt/conda/lib/python3.8/site-packages)\u001b[0m\n", "\u001b[33mWARNING: Ignoring invalid distribution -atasets (/opt/conda/lib/python3.8/site-packages)\u001b[0m\n", "\u001b[33mWARNING: You are using pip version 21.3.1; however, version 22.1.1 is available.\n", "You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\u001b[0m\n" ] } ], "source": [ "!pip install datasets transformers" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "TvDJ7CYpzSJQ", "outputId": "f58b6c87-1c32-4aa6-9945-8fe3b1eb4a66" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[33mWARNING: Ignoring invalid distribution -atasets (/opt/conda/lib/python3.8/site-packages)\u001b[0m\n", "\u001b[33mWARNING: Ignoring invalid distribution -atasets (/opt/conda/lib/python3.8/site-packages)\u001b[0m\n", "Collecting https://github.com/kpu/kenlm/archive/master.zip\n", " Downloading https://github.com/kpu/kenlm/archive/master.zip (542 kB)\n", " |████████████████████████████████| 542 kB 3.8 MB/s \n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n", "\u001b[?25hRequirement already satisfied: pyctcdecode in /opt/conda/lib/python3.8/site-packages (0.3.0)\n", "Requirement already satisfied: hypothesis<7,>=6.14 in /opt/conda/lib/python3.8/site-packages (from pyctcdecode) (6.46.9)\n", "Requirement already satisfied: numpy<2.0.0,>=1.15.0 in /opt/conda/lib/python3.8/site-packages (from pyctcdecode) (1.19.2)\n", "Requirement already satisfied: pygtrie<3.0,>=2.1 in /opt/conda/lib/python3.8/site-packages (from pyctcdecode) (2.4.2)\n", "Requirement already satisfied: attrs>=19.2.0 in /opt/conda/lib/python3.8/site-packages (from hypothesis<7,>=6.14->pyctcdecode) (21.4.0)\n", "Requirement already satisfied: sortedcontainers<3.0.0,>=2.1.0 in /opt/conda/lib/python3.8/site-packages (from hypothesis<7,>=6.14->pyctcdecode) (2.4.0)\n", "Building wheels for collected packages: kenlm\n", " Building wheel for kenlm (setup.py) ... \u001b[?25ldone\n", "\u001b[?25h Created wheel for kenlm: filename=kenlm-0.0.0-cp38-cp38-linux_x86_64.whl size=2341844 sha256=7389c3819998781002180209fa8ff1711b65630ca5dc282cff4b128a9db2c0bd\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-yk63c6mt/wheels/ff/08/4e/a3ddc0e786e0f3c1fcd2e7a82c4324c02fc3ae2638471406d2\n", "Successfully built kenlm\n", "\u001b[33mWARNING: Ignoring invalid distribution -atasets (/opt/conda/lib/python3.8/site-packages)\u001b[0m\n", "Installing collected packages: kenlm\n", "\u001b[33mWARNING: Ignoring invalid distribution -atasets (/opt/conda/lib/python3.8/site-packages)\u001b[0m\n", "Successfully installed kenlm-0.0.0\n", "\u001b[33mWARNING: Ignoring invalid distribution -atasets (/opt/conda/lib/python3.8/site-packages)\u001b[0m\n", "\u001b[33mWARNING: Ignoring invalid distribution -atasets (/opt/conda/lib/python3.8/site-packages)\u001b[0m\n", "\u001b[33mWARNING: Ignoring invalid distribution -atasets (/opt/conda/lib/python3.8/site-packages)\u001b[0m\n", "\u001b[33mWARNING: You are using pip version 21.3.1; however, version 22.1.1 is available.\n", "You should consider upgrading via the '/opt/conda/bin/python -m pip install --upgrade pip' command.\u001b[0m\n" ] } ], "source": [ "!pip install https://github.com/kpu/kenlm/archive/master.zip pyctcdecode" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 415, "referenced_widgets": [ "edc436f12376423798af31da019eb50b", "0705b2e7a85c4b4aaebc6cc3494af44b", "8a978848e55a481a94a96b36c30a5076", "acafad3a99b0403486ceac05b768bbbc", "5e07b1951a8d4478b3e38f1e81020d92", "858ba1c848f24b8491d022b277914e84", "3d8e921c0e854bdebb17108b4cabc9f6", "4743028b12014476a634153931a26702", "ef61af4612034388ac9a97125375c2b2", "a45572c8a5714f358bdf6733e7754be3", "0ad2427a4ecc4636a10f295d9178d5c2", "7740c0f39b704684bf0c51a0f2f437af", "48ad04d04f6e4adaa33d635b577a8018", "fd3e054fad4047daae45266d480bcf6a", "ab27dee9582a4f3d9b4010a31558d5fb", "a2a21e4c9deb4a34bffd835a7cb3495b", "244473d30bdb4aa7a08a22e752763da6" ] }, "id": "JHTeonOGXiGq", "outputId": "e4a93331-f896-4d7f-db9a-6f537ee9ad34" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c3eef7b5d70d46feaa5d3d7f1281eb82", "version_major": 2, "version_minor": 0 }, "text/plain": [ "VBox(children=(HTML(value='