{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "ThSJZZIwpDSe" }, "source": [ "# Bias and limitations" ] }, { "cell_type": "markdown", "metadata": { "id": "iZ73Z8prpDSl" }, "source": [ "Install the Transformers, Datasets, and Evaluate libraries to run this notebook." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "yxE96cbtpDSo", "outputId": "0e022765-90e3-4002-c866-f74d0d8d37a9", "colab": { "base_uri": "https://localhost:8080/" } }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting datasets\n", " Downloading datasets-2.14.4-py3-none-any.whl (519 kB)\n", "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/519.3 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m174.1/519.3 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.3/519.3 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting evaluate\n", " Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting transformers[sentencepiece]\n", " Downloading transformers-4.32.0-py3-none-any.whl (7.5 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.5/7.5 MB\u001b[0m \u001b[31m23.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.23.5)\n", "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n", "Collecting dill<0.3.8,>=0.3.0 (from datasets)\n", " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n", "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n", "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.1)\n", "Collecting xxhash (from datasets)\n", " Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m18.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting multiprocess (from datasets)\n", " Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m14.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.8.5)\n", "Collecting huggingface-hub<1.0.0,>=0.14.0 (from datasets)\n", " Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m22.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n", "Collecting responses<0.19 (from evaluate)\n", " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (3.12.2)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (2023.6.3)\n", "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers[sentencepiece])\n", " Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m49.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers[sentencepiece])\n", " Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m55.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting sentencepiece!=0.1.92,>=0.1.91 (from transformers[sentencepiece])\n", " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m58.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (3.20.3)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (3.2.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.2)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.7.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.4)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2023.7.22)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n", "Installing collected packages: tokenizers, sentencepiece, safetensors, xxhash, dill, responses, multiprocess, huggingface-hub, transformers, datasets, evaluate\n", "Successfully installed datasets-2.14.4 dill-0.3.7 evaluate-0.4.0 huggingface-hub-0.16.4 multiprocess-0.70.15 responses-0.18.0 safetensors-0.3.3 sentencepiece-0.1.99 tokenizers-0.13.3 transformers-4.32.0 xxhash-3.3.0\n" ] } ], "source": [ "!pip install datasets evaluate transformers[sentencepiece]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "k8T-_2OupDSt", "outputId": "8d7d4519-dd9f-47ba-a72b-f27f491d6d9b", "colab": { "base_uri": "https://localhost:8080/", "height": 286, "referenced_widgets": [ "b47a91a07ed94b3889980007b3ec8c57", "7b0f73da8ef346488b75d024b37c2a58", "b926cd5cfd524bcf8cb12f53b3c24e59", "00973fa305764b71a0f5fe647ee9efeb", "b0216f05f72847adb4e9129838d1ae26", "bb1a570a6568425cb3675874d7083ad0", "3cb0d07213074f56ae93cc13efee531b", "949ce5b046d640959350db5e53aae3cc", "5dd5a1e8d6604561b52be69d53faf89b", "9a1674e708b54dc98b6a76846e3e74c3", "362c957808c7412f87a6339c4aae6f5e", "6be21565492b455c936378b7e3a019e4", "2185097328ae473c89d989cb5a0ed409", "9fda58d130ce49a38ddb2b48fa639f51", "33986a3a844e45158f3ebe50d4099741", "e721a09a918d4efb910b08d13cd8230e", "72d5629a2cec49a3933fc575148c85e9", "956a698e96654c578c55585e2dc1d872", "ce3e99b04a324278a150d25780af0100", "b4877cfd84d24fb29dbcefe2c13f3abd", "8276337cc8764b77a9a3006b2db2d7d5", "87b860c01b354682a8df5ed22ee2735c", "95a0686b03a4436586b2595051267b47", "aa8c205a3ca64279869c0a471c42d409", "5d0b663db1d34e789d59917a556c4f29", "f3c9ff05d6c94fba866d156ba7209f52", "4ee0cbd1e820418e94078cfe065586fe", "17a642e0267d4bad83d9a282fa87b635", "36a71bceb2054daf916a8232283d8b31", "c6a9a51c62cd47d0abc3acd77920ac1c", "98cfb4283fb1457c8cc9e1821cbb5e68", "d27e65e1923347c382d067b581846722", "a87cc397aacc4e8da05c9de500f5a8be", "885d024f4ed14d67bb546e8a8d568340", "cc3fca9573f64bb7abd00837d32f0e1c", "f49fc5e726904212b1db1a1fd553fb1f", "0ffd08e5d90543cd84c06ea9354000bb", "2ba332ef31204469b952531712b1effe", "6129d3eb6a874088b3406ba37ae33059", "4f3d6352d87e4d668f944e26f05ef1c1", "66b95f212f2e4bcb9952b22833e82068", "add6189643784817b9404d42a6c4b4a3", "8311558f10b745c991485054deae98a9", "5e2d8a5ff10744feb296743d4cb310b1", "66c9b0ffd5cb4a77b799bc60e7643077", "55f0eb13c89943acb6ef5e4ea00448c9", "329bfb3686b34c8bbb6ae99a65ad49a4", "6fd2c308e8884dee99bdca115de19399", "de2832d0b18848a69baea8b6a360f0f3", "47062bf60aea49a3a2cec6881a7e01a8", "3669728ad6924f62965d437f38efdbd5", "9ab8cd76e8a74d33ba40efb3c371fd1c", "b695b063ebc646d9af2fdb7714652ec9", "58ecbb9b5a8e4c55aead5b24d27afb86", "e508217023c149fcb065be2bec817a1c" ] } }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Downloading (…)lve/main/config.json: 0%| | 0.00/570 [00:00