{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "V6LpJnLj_kVX" }, "source": [ "# Transformers, what can they do?" ] }, { "cell_type": "markdown", "metadata": { "id": "RmaCJy-c_kVd" }, "source": [ "Install the Transformers, Datasets, and Evaluate libraries to run this notebook." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "En9ZDb0l_kVg", "outputId": "b9520091-580e-48db-f7b6-da20db63fed4", "colab": { "base_uri": "https://localhost:8080/" } }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting datasets\n", " Downloading datasets-2.14.4-py3-none-any.whl (519 kB)\n", "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/519.3 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m \u001b[32m512.0/519.3 kB\u001b[0m \u001b[31m15.8 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.3/519.3 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting evaluate\n", " Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)\n", "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/81.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m81.4/81.4 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting transformers[sentencepiece]\n", " Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.4/7.4 MB\u001b[0m \u001b[31m99.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets) (1.23.5)\n", "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n", "Collecting dill<0.3.8,>=0.3.0 (from datasets)\n", " Downloading dill-0.3.7-py3-none-any.whl (115 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m13.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n", "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (2.31.0)\n", "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (4.66.0)\n", "Collecting xxhash (from datasets)\n", " Downloading xxhash-3.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m19.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting multiprocess (from datasets)\n", " Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m17.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.8.5)\n", "Collecting huggingface-hub<1.0.0,>=0.14.0 (from datasets)\n", " Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets) (23.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (6.0.1)\n", "Collecting responses<0.19 (from evaluate)\n", " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (3.12.2)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (2023.6.3)\n", "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers[sentencepiece])\n", " Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m120.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers[sentencepiece])\n", " Downloading safetensors-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m78.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting sentencepiece!=0.1.92,>=0.1.91 (from transformers[sentencepiece])\n", " Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m90.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from transformers[sentencepiece]) (3.20.3)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n", "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (3.2.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.2)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.2)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.4.0)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0.0,>=0.14.0->datasets) (4.7.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (3.4)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2.0.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets) (2023.7.22)\n", "Requirement already satisfied: python-dateutil>=2.8.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2023.3)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.1->pandas->datasets) (1.16.0)\n", "Installing collected packages: tokenizers, sentencepiece, safetensors, xxhash, dill, responses, multiprocess, huggingface-hub, transformers, datasets, evaluate\n", "Successfully installed datasets-2.14.4 dill-0.3.7 evaluate-0.4.0 huggingface-hub-0.16.4 multiprocess-0.70.15 responses-0.18.0 safetensors-0.3.2 sentencepiece-0.1.99 tokenizers-0.13.3 transformers-4.31.0 xxhash-3.3.0\n" ] } ], "source": [ "!pip install datasets evaluate transformers[sentencepiece]" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "fEgd_Apr_kVj", "outputId": "9bcd59e0-3b94-470f-abdf-46140766cad4", "colab": { "base_uri": "https://localhost:8080/", "height": 272, "referenced_widgets": [ "2b227efcb1ef4fe582e260e65724a5b1", "eb737d74c23647c5b274ebbb3bcb732b", "51b0d83b31e84100aaf2b15da3e8fdb6", "a4ef9e1af2ee43fbab44f8e60c9eb5c7", "28dbb8c817474b1aaf6ef2f3d0359a8c", "e6ec4d00af314d7086c9def19bcc468b", "461cc5b087e94f14bca802c37efc59a2", "a374bdfffd2640ebbded683f3250ca2c", "f48ef84cfe0e499195e58c1dcddcbc02", "2f1381b5148c429f8d53172c57c85afe", "f834d4d65af14756ae566c9a521268e9", "a88cbc92b4674c35b5fb6182390a9b6b", "16d47dce732944dbb1159a8b787602a7", "1d564f837ad24e789297954ecd0c78ff", "62a6312e00a54939bf695a3f7d904c4e", "9d9721bd71914a348efc359c35914cc5", "2bb1a322c7004e2ba8ed51607b3435bc", "daf56d2079834e30b6de74589bad9860", "cc4905c11ad645a4857acd6423eb70a7", "c5703d6bdfb24358a90ddd17d13f1588", "5b3ef27c847c4b619a34639e59aab498", "df9f31a0286b440eae2b4befe06cfc3e", "f6b7f359b9b641db932de4741a9ae626", "632a7460a75b4a6ebc9d337a023e154e", "71cad0099de445c9aeed12eb76483fb7", "aec190c4da0d4a53b2015a890971ca34", "2cbefcc780ca41c1b5d519bc0c6313b3", "d34c36f3f30946e488cdd630902bee7f", "d98c5a77f7b047b3a5a95eabf0afe693", "cedc1c5d98b94259b873ebe5b39cb8d7", "e186eb5e9d794172b1b2972a50875aa2", "16b64d8eeb124059b87a6146400ffd20", "f1078c9d02fb4f759ade8501fc8d69ce", "f239e24758a14791b82ea1c8c32c2b29", "54c6de94cbba4a079e472acd364322c4", "7a19d9982458481f90c89ba5ec94a99e", "588f3f5b4a3e4bd983793223491f277b", "7856fac2d8b54b8695f56c3d1c68fbe2", "773d723205ad4e819ad17deb28ec57f6", "52b1c3c85e4748c98b5acdacc7c6a0ec", "575fb72fef3446ad8a1936d5670e351c", "e069a3773f21406b86144522c37f5f13", "5eee1190ba034536ae2ff418fc213e5e", "bc9feef128e442edb03a20ad08b85832" ] } }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).\n", "Using a pipeline without specifying a model name and revision in production is not recommended.\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Downloading (…)lve/main/config.json: 0%| | 0.00/629 [00:00 models.\", top_k=2)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "TKZduMK8_kVq", "outputId": "2ec1bbae-ce56-4ce6-b9c6-36304573b41a", "colab": { "base_uri": "https://localhost:8080/", "height": 558, "referenced_widgets": [ "0e13fe74f9e145a58e26fbe5db511046", "861ca5a4df6f4ad2901ca4cd466f4e6b", "ec7226eed3c1411186491812ce0b31e8", "fdf5dff382a2499aa550f0858953233f", "b34ea4519c444cbe992a14d8fe15465d", "6ce23e6367da42f3b1f1625627f4dfa1", "b8b228c2617b481c9b0231040c32155b", "1cac3ff6226e4d3aaeab4f80725c7ee4", "ed101bc5883049c9b2531d874091c861", "e4d0a28ad77e4c4991df3eb83b970604", "a537e6e1abb74f90971fcb91b28272ea", "61e02d8fed43474a81af3eff4fd1ceb2", "18a562e037b2499c86e14156392de9b2", "8ed82efa88364f3fa672f4d03279c9a2", "b6dbb421e922472ea63d007b3f98ca93", "b4a2da675e3f47349a40b8329e5ea111", "625673920b9944dca569185297f1342d", "19703f559dc7476bb8cc274260a8caea", "7544a96dfa3b4a76974a79084ec69644", "6c96c776a93a4a038d93072719952cbf", "5d9afea7b36e4b83a9a5e0fa5ac377c2", "1a92b289dd3740bb917d4c4ecad29057", "3c123e80d8574c51ab66f9910471a524", "40a15852f77d4ddd943b57d692c5e1db", "225bb48452f84dcf89aef63a658971fa", "d316599edb0b4790a662833e7e778cce", "a531e3db006641d3aa4475445c6c3fd1", "7e818f07722e4740a3c967e342d6eca2", "ca819e075f4f497785c88102789d0c4a", "e5392100cacc402cbaa624e24bb799a9", "4820092f5e4949bb8976e606d6ae4802", "d220054d463b44d0a3c302e2fd19301d", "00960c89d5c942e2bd6446381e8b7893", "d5df9b8accfd4f85bd4b601189d9a569", "c36d9e5c66ac4788a2a6461b9c72cc03", "448743db0e7549858633b4560b67da86", "eedd65ba8179445485352bdcdbded054", "5530315ff0e74899a686386bf9e77a63", "60fd4d20b532406b8c0346521a3de326", "e2cbb9a09be540918c5640e268e50461", "5783657d28d840c5921a43a8607fb700", "45ea1cb853e54604b5f23c5683c7287d", "0bf4133860a94789b0c260ce35d12004", "657dc90fe9b047cda3e8ba65c71f5816" ] } }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).\n", "Using a pipeline without specifying a model name and revision in production is not recommended.\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Downloading (…)lve/main/config.json: 0%| | 0.00/998 [00:00