{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "iIaeyOnOLOgX" }, "source": [ "This notebook regroups the code sample of the video below, which is a part of the [Hugging Face course](https://huggingface.co/course)." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "cellView": "form", "id": "swZrDZczLOga", "outputId": "3d089d55-0e68-48f0-995d-cc61289a073b" }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#@title\n", "from IPython.display import HTML\n", "\n", "HTML('')" ] }, { "cell_type": "markdown", "metadata": { "id": "96D08SOHLOgc" }, "source": [ "Install the Transformers and Datasets libraries to run this notebook." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "Kag_-VosLOgc", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "993d0981-9166-42f2-b62e-e295279274cc" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Collecting datasets\n", " Downloading datasets-2.8.0-py3-none-any.whl (452 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m452.9/452.9 KB\u001b[0m \u001b[31m5.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting transformers[sentencepiece]\n", " Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.8/5.8 MB\u001b[0m \u001b[31m56.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.8/dist-packages (from datasets) (1.21.6)\n", "Collecting responses<0.19\n", " Downloading responses-0.18.0-py3-none-any.whl (38 kB)\n", "Requirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (2022.11.0)\n", "Requirement already satisfied: dill<0.3.7 in /usr/local/lib/python3.8/dist-packages (from datasets) (0.3.6)\n", "Collecting huggingface-hub<1.0.0,>=0.2.0\n", " Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m182.4/182.4 KB\u001b[0m \u001b[31m7.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (2.25.1)\n", "Requirement already satisfied: pyarrow>=6.0.0 in /usr/local/lib/python3.8/dist-packages (from datasets) (9.0.0)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.8/dist-packages (from datasets) (1.3.5)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from datasets) (21.3)\n", "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (4.64.1)\n", "Collecting xxhash\n", " Downloading xxhash-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (213 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m213.0/213.0 KB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: aiohttp in /usr/local/lib/python3.8/dist-packages (from datasets) (3.8.3)\n", "Collecting multiprocess\n", " Downloading multiprocess-0.70.14-py38-none-any.whl (132 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m132.0/132.0 KB\u001b[0m \u001b[31m2.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.8/dist-packages (from datasets) (6.0)\n", "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1\n", " Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m51.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers[sentencepiece]) (2022.6.2)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers[sentencepiece]) (3.9.0)\n", "Requirement already satisfied: protobuf<=3.20.2 in /usr/local/lib/python3.8/dist-packages (from transformers[sentencepiece]) (3.19.6)\n", "Collecting sentencepiece!=0.1.92,>=0.1.91\n", " Downloading sentencepiece-0.1.97-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m29.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (22.2.0)\n", "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (4.0.2)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.3)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (6.0.4)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.3.1)\n", "Requirement already satisfied: charset-normalizer<3.0,>=2.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (2.1.1)\n", "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.8/dist-packages (from aiohttp->datasets) (1.8.2)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.8/dist-packages (from huggingface-hub<1.0.0,>=0.2.0->datasets) (4.4.0)\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /usr/local/lib/python3.8/dist-packages (from packaging->datasets) (3.0.9)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (1.24.3)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (2022.12.7)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (2.10)\n", "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests>=2.19.0->datasets) (4.0.0)\n", "Collecting urllib3<1.27,>=1.21.1\n", " Downloading urllib3-1.26.14-py2.py3-none-any.whl (140 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m140.6/140.6 KB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.8/dist-packages (from pandas->datasets) (2022.7)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n", "Installing collected packages: tokenizers, sentencepiece, xxhash, urllib3, multiprocess, responses, huggingface-hub, transformers, datasets\n", " Attempting uninstall: urllib3\n", " Found existing installation: urllib3 1.24.3\n", " Uninstalling urllib3-1.24.3:\n", " Successfully uninstalled urllib3-1.24.3\n", "Successfully installed datasets-2.8.0 huggingface-hub-0.11.1 multiprocess-0.70.14 responses-0.18.0 sentencepiece-0.1.97 tokenizers-0.13.2 transformers-4.25.1 urllib3-1.26.14 xxhash-3.2.0\n" ] } ], "source": [ "! pip install datasets transformers[sentencepiece]" ] }, { "cell_type": "markdown", "metadata": { "id": "zzVH8120LOgc" }, "source": [ "You will need an authentication token with your Hugging Face credentials to use the `push_to_hub` method. Execute `huggingface-cli login` in your terminal or by uncommenting the following cell:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "EHRAqWajLOgc", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "3dc32e13-2941-470a-a41b-8825f5abaffd" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", " _| _| _| _| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _|_|_|_| _|_| _|_|_| _|_|_|_|\n", " _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _|_|_|_| _| _| _| _|_| _| _|_| _| _| _| _| _| _|_| _|_|_| _|_|_|_| _| _|_|_|\n", " _| _| _| _| _| _| _| _| _| _| _|_| _| _| _| _| _| _| _|\n", " _| _| _|_| _|_|_| _|_|_| _|_|_| _| _| _|_|_| _| _| _| _|_|_| _|_|_|_|\n", "\n", " To login, `huggingface_hub` now requires a token generated from https://huggingface.co/settings/tokens .\n", " \n", "Token: \n", "Add token as git credential? (Y/n) y\n", "Token is valid.\n", "\u001b[1m\u001b[31mCannot authenticate through git-credential as no helper is defined on your machine.\n", "You might have to re-authenticate when pushing to the Hugging Face Hub.\n", "Run the following command in your terminal in case you want to set the 'store' credential helper as default.\n", "\n", "git config --global credential.helper store\n", "\n", "Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.\u001b[0m\n", "Token has not been saved to git credential helper.\n", "Your token has been saved to /root/.huggingface/token\n", "Login successful\n" ] } ], "source": [ "!huggingface-cli login" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "ApM7SBBgLOgd" }, "outputs": [], "source": [ "import numpy as np\n", "\n", "from datasets import load_dataset, load_metric\n", "from transformers import (\n", " AutoModelForSequenceClassification,\n", " AutoTokenizer,\n", " DataCollatorWithPadding,\n", " Trainer,\n", " TrainingArguments,\n", ")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "rKYYK89sLOgd" }, "outputs": [], "source": [ "checkpoint = \"bert-base-cased\"" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "aRBszIX6LOgd", "outputId": "42335f31-24da-4827-c503-6c4c4c13acad", "colab": { "base_uri": "https://localhost:8080/", "height": 804, "referenced_widgets": [ "8a2a8e3747c84661a6f53b72c11c5cc0", "ba4143fed4c746548d909b4c7b46be7b", "4b2b57e2a1e04ba9b73ba8a2820dcb2b", "e0a9cb09969d47fd80bf2c7300238a48", "1e4c9ec67eb24f6b8a6f6652c3c1258a", "837c08e4dc3a41cc92a7706dea21e424", "915dc12a423942058e32fcdd2efcc3f1", "675a9ec3bcd247eab4ae77994330b168", "2b44e332295e44028ba0ded438e8a494", "b135d0d9ec4e423fad7b38326ac470b9", "080dc21d96c345f78add86b07c1f4f81", "30eef94e516c47c280eaf5f753287aaa", "134dcffc9ba44614a72cc428ae9c49d8", "6d3298733597460fa30743ef760f385f", "c00a07e990df45c695012b53725c88d5", "580ad9e0b89d435a9cfc610d502afada", "e57051b2ad2b42c1ba89abd70e9c41f7", "f8182ab1142041499af781c33a1c9d99", "813358015f054a979e807d2bf6cd8037", "4efa766a33bd4f16a0d728e97fe4fd5e", "5195cf47faa7483fb972b4d9310ca145", "9cfef33ffee24791886b402f9cc0e926", "ba4bbb032bab4043bfa134bfc514e3e5", "b1ec07dd78534132862f7ebbe79b628d", "8fedb67f96614a9c84a2e8f3703a5de0", "ca9a8a13f4e44de5aac22713db398921", "ddcae98ec5c8461f9d24b58f186d55fb", "acf72b656f684f45b9871f3575972ab6", "3503c8c702074fe2906bad0ccb4f0836", "190d82e27e224ab6bd19610d95a7895a", "ab835a6af2a24eb8839bab0320fae667", "7aeac18f73c94e989611be8e97458ceb", "8d708852f7e54f028c2d9c5e83a59a0a", "41d5b07136d04c59a9672a3cb7936798", "c66d8dbf0d504e76b259e4d68347dc00", "b9a9c154f16f46bc8e828e5f8ae18983", "c30e0bc75a024a07ac7e92e393e2ea29", "97bea54408684b4582f5f61c5ae20ab0", "358700fa59464f1fa830ddebe3273c5a", "393440e511854a7dad772c0b8027c00c", "c5909ea71b1f4803961e85487342133b", "f6a1257dbd764cafa64b98ba6cfdcd02", "ca900669d5764996be2cf4df9d5da1de", "71650f66768a4a179bc7cd62d739c280", "661aca2ddcfb4e02876ba3f66a362bfa", "70fef1d27d3f43e481c258926e58e0c7", "c92e7a64084f4814a7337f57c0fb3afe", "5e3a4bbdac384db79c5ffc5c1119089e", "7622a231f61c4629a4d9d530664eed33", "d76a9eb9268c4085a08ff7094067b6cc", "beda43b0df004494b4b66a9797bfb527", "2ac61e95bed941a88bbd67e7e63190ac", "ccda23926401444bbaf8355449c16ab0", "bb6c7a0708284817a68206089f9e2fa2", "2ddbe64119da4d3d819b15b2bde3b8d5", "a3918fbf949f495d8a3a289e971a4268", "e7027777c3f14583bfcf121c73c3dc2d", "c592cfd3bd5c4d1598ad9256db715c8b", "411a9d4130bb4ab18445f6ab18da025d", "1d67381d7cf742d7bc726d274b5a67a2", "3cb42235bf3d4e9b894efb70269c7256", "17424667cf874d44808bc121f0a67cd6", "135d450014e44124b9baf77c342bbb67", "67412bfe3f4f4e158ae6a57eda96fc5d", "0e65e697e48f4ae4b01e5d9906fb5f93", "6df0a315159a44a6b3f144a90f1df71a", "dc48c4ef88214f24a4f0a5bfdb3bb72e", "5ef0d795c8104d19b430ffd62f950acf", "49e5c604077345dcb88bda303c7554c9", "0669cf337dca470aac8744ea0bcd2979", "1297bee776aa4d718a8aa27517737255", "6b3964b5ef8142068e9276cbb831b7dc", "f72d7a937bd54fb5874c51e5b2c92359", "acc84f70c6204e32b04e846b157a6323", "b8ddd88b7580427e885a8434b767d0c5", "76dd9a523bb34132af9c494fdae3af45", "92e37f2113b14aafbf615d11c9ab1845", "d7f3164c40224d10a41f10e1ad925c63", "87b310fbdddc4a2f849e84d3226e26a3", "5ec1f0a02b6e4855b6ed19930a6906b6", "490f7dcc6a624ba892c00a10eebde001", "eb3aafb6d19d4c4091aef8683d41dc71", "778c9d559f494d288ba335bf822104a8", "3e8f80617759404986e4dc16f17a3dcc", "10ae020986234e26b5b99e0634bb5a25", "e0ab07038b2a4a38906d17f621e756d9", "13ff733a6ea7481bac98d3f029ff4095", "2af943e214a94fe0a3566cee702b4732", "45ecdecc5cdb40b28df9255c01a52d84", "a04d6e727e1742f1bef215b2515004b8", "a28f875369e042e6a9e2805a906b24d9", "c055f850407f4aa0a544d72894f4cda5", "19b3c5311335482d85bb906e27a082db", "fb762a33086c4a36a5f174c9812ab595", "61bb76f4f7e641c5b2655a2450e4fdeb", "d3d6ff946a89457c8595e7e5c50ed473", "d8cecadd801242d9a17cde7e06e715cb", "ae567717d7844ce4ab0bc789b9730a3a", "153be5a84b5f491aadc7b26b9943bdf3", "d36ae587d48b413b8e180c8a086a1db7", "84c2639934b345b2bcdf35f75f3c8320", "15977ea6792046b4a4e7ca8f7c1093de", "8142d410f52e49768adb4f24e33e01f7", "15ed537104404a669e8efdf877f8538e", "9a656f051b134fdcb34aca1044f54078", "951ea38dd459489f91bbf30ae13a97e6", "dc09c077a5dd463a857bee238c376d2a", "88476e6bcb5647069adfab22c7e0fb14", "b9862f6ddf2d4f088807b8b972f74362", "3a68526ee011454fb8cdf4683f152b6f", "8e9a975126f24b4e92e525db74847e4f", "98c46d314b504b73a56a64c354913521", "2a6f063a57a54f37b40f1e6d99550b20", "eee5c06bd6dc4a18bb868a817c0ec948", "ae741359f9d1409e82c05882c0b9dd4d", "6a81b201ef4341bbba605c3f67ef5812", "95d933f2b4c04cef96a51e96e421e7b0", "e552c4fb6c324f03a65fc43d89bf01ac", "2b877cf6d08b48e989eda28b56b9fa51", "4721a0d40ef64fa0892f11ee4ca00a3a", "0d10d20b5f8543fe9f7465cb53003f20", "cb20508119a846c58244f5c4edf667be", "fe3fe3dc0bf640b780c2adf4321b99bf", "00460090c34a4292a8fa683f94d96017", "6e9984e736d84ebc92b64f861b6e750f", "87b7e7b772784275af1969110004a4ee", "5919ca09a66949b894134a1f22f29067", "9f80bca8570245b19486199ac4d5b557", "23143ad95e404f7a86b42f4e0351b21f", "734acca574a94b1da5c44615db2bd486", "5f6019146d2343008657d32f70700d52", "e0a98f923771414e9cb98f1f9ff177a0", "aa46efa5ac72493092106d56c301dce2", "73012d05d3b24fbf92bb0567a13b76da", "c96585ddf9984c139ebc7e3611adf3b1", "56477260b4c347098e1d60fb7aaf8298", "9224df7206e946d39f865f589dc802ef", "02b3d9b0f99246a792a95d6861681c0c", "ee6ad2656e8540ab8166cd580e7491d7", "9b11cef7dbb24872918a2c273d3638b9", "386efee56003465abfbbed0743a6a163", "2170ac0086af496785afebfcd479d9c7", "7b981ad194bc422e9dd911fa88f3e445", "5f37728b96884d52b8e402b0a88f4caa", "49707e99ca044000b481720d60095345", "6933a0116e8c4862a837545d6b3cdc49", "2436d5407ddc4892b4844ac6ab541092", "cd27e7ef62104e9eaf19ddf9ba9e79fb", "5c73f5badb5349f9ba6e16739d3563a9", "66a6c682492b40c98e3c38311b506ad5", "ffe2cdf878f746b6b5e9bc60cb0e6161", "5b2c2b05ce894ccc93ca236860ec6e8f", "e38042e2583b4aea9c84ba40087bc3fc", "e435f1fa5cf44255bdec8ef2d3ee6e82", "b6e68f65c0bf4456a81b5aa3afca59ae", "9b964d92dcb64233b63062fc7a1dc9af", "7803229e31fb4fbca8d7a8e49cd0ecf3", "9c904259f08a4710838d7bb621a05420", "80f946dcbc4e48429d4eee1fa7ee4ec3", "6506b90feebf41729c5b32f590d8540d", "1331183281a54a0d9d45e1824346ff93", "b28d17647f7c47d8a17a37575c39331b", "3a1c2991dd044e61ba74fc12b008aa4d", "e92013c21a4244fa8a6f715ad7940e1b", "96b2da1e47b34cd79d4e9454ca450637", "3c3957912ca148e6aa3a5a693805fae2", "4532f9674991474493249fcb092f709e", "3b8130fde37c49419744bb6d5302cd27", "88894ea4cc274412a56651329173a6a1", "9fc2c288771f49f994cc069b0b0bdbb6", "0493a8283d7d47d8a8d69613dce7a37a", "582e90d77c6c4e9496fb3fe22dc0b0ec", "4af02343e9ed4e45a995400ee30899a1", "8b350d6bc0e444f0b75b9eed59f5b2b1", "79928db5ac8742e29fdba6993892fcd9", "dbbfae50b9c84206922979ec7cd1b4e7", "9d6b42ab35bd4fa2ad5be4c4fd39c855", "506f1e48a7974ba48b79858a18fc950b", "0c9c83ca3ff545fbb9503cd49f635eb6", "8e95833771714c079d4506ac4058c2a4", "030681451d0645f79a6c8d48c653f84e", "a5d042faafd549c182ecd8618ef04d9f", "5a076001356b4d7996674f16fb6c350c", "59a5ad12457d4f4bbd12b1ff7aa746a8", "7932ba1ccea04d2587d58b4b496f3519", "ff7a67c450fb49c1a8e2b69ae4f6e230", "6151d59eadb94d31a44d3e935f54dd41", "ebe85cc5112c40719198664763e511c4", "6ddb1961d8ab4a0ea6a8adc7b5633dfe", "6d68c54bff1a4c57a7443082a10476c8", "9caafb64272443cc8e6512fe17243641", "91ba97a4a94f4b9aba0b240e36a8355f", "3780ffc9f5e54ac7b18b4b7c27308446", "362ab0c23dd54c3ba83d0aa3cd32e33c", "e2be7abe45a0497e9045beae78c239cf", "191ccbdfbd3e4a21a10386ffe3b49489", "3817062330ef49488f71beb7a737aabd", "f6e16ee7684b4e91a447f0b19c12e1a9", "89e564b45ec64a0daaa544aca7bae379", "d3fd275dd01d44549d8ce07dc83a7f17", "dadb5336371042ed8ae328e2e154aac1", "9d61a6194b794f9093280957602a6097", "383e5b0124924f849325182bcf253dfc", "b19a6c24884d44d696fb7814d8910647", "3d34ce7ae6fd482f91bfbf3b1019b0a9", "1ff92fae82644e70a1200ec1c87b5745", "ca86506d29164818b002a79ae6f3a5f2", "000baff64b994c6b8307e6d2e8e3f6b5", "0f2435e2e1244cc99b3e1bb0b02266bb", "5007e6402dff4f969624a72209651487", "5c90cb5b004d4d45b63206e6e4777c26", "954505fc69ba4c40b78154f9cb4a1c1a", "e7369a0de4e747efbae2f9703e07225c", "228879f1834a4d55aea9cd3da8f2cb6a", "e3d0f5c6fd354eae9729df2da00c0834", "8fb14fb73a784de092ec4395be71683e", "addcb03b894d455bb1af5ac55ac3f833", "47da4f1811c8448fbfaec064631f3aa6", "af58882929944b59aad36bde8a7a2c69", "69690965aa2e4b749beae0b8318f0897" ] } }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Downloading builder script: 0%| | 0.00/28.8k [00:00:29: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate\n", " metric = load_metric(\"glue\", \"mrpc\")\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Downloading builder script: 0%| | 0.00/1.84k [00:00" ], "text/html": [ "\n", "
\n", " \n", " \n", " [690/690 03:18, Epoch 3/3]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossAccuracyF1
10.5315000.3697700.8382350.886207
20.3000000.3677360.8431370.891892
30.1575000.4148240.8602940.903553

" ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [ "TrainOutput(global_step=690, training_loss=0.32964635655499885, metrics={'train_runtime': 202.7687, 'train_samples_per_second': 54.269, 'train_steps_per_second': 3.403, 'total_flos': 446187016567680.0, 'train_loss': 0.32964635655499885, 'epoch': 3.0})" ] }, "metadata": {}, "execution_count": 7 } ], "source": [ "trainer.train()" ] }, { "cell_type": "markdown", "metadata": { "id": "MQa7yl7kLOgf" }, "source": [ "## Push to hub from the Trainer directly" ] }, { "cell_type": "markdown", "metadata": { "id": "CkowGBeaLOgf" }, "source": [ "The `Trainer` has a new method to directly upload the model, tokenizer and model configuration in a repo on the [Hub](https://huggingface.co/). It will even auto-generate a model card draft using the hyperparameters and evaluation results!" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "xQmXMR2jLOgf", "outputId": "d1fdf664-81e3-4bd2-d6a0-e082bdf6f85b", "colab": { "base_uri": "https://localhost:8080/", "height": 447, "referenced_widgets": [ "ed62064d1b484620bba451d48b68f078", "3b01f4b823684aa2a6fd51cbeb6020c9", "7b4f130843414556ace79de1068bee57", "86d0ffc1f3064fd0b4d8f431faa7bd68", "030732c84c24446e926f63b3a7ec7224", "37a796adaa8b443eb07ebe90a4c013a2", "b80d047986b24aeb94c8fb4be962b866", "a14a89661af046b28522d0c3a770ee69", "03485fa701e24664bf78b1fbfe1cfe92", "12e19733e4434180819cdbaf7efdc932", "d107c89f9f854f548408018b8bd4e619", "e9be74cfb91f4f0c847392506e0b8bda", "a048e08a2b3347baabe8eaee57d9c1b8", "b56ba071bb8b433baca253fdf0a6a9be", "38e42829635540ad896f4f1b32423391", "f72c400eb6d24854a2d39918523ad685", "b5a58d299e9f4a66b9ed57c8c40347e6", "53b8181f32be43519b8f428c9f59c325", "746afe1d78d64acfa6287295620574e5", "15b68a98df5245ed8b7de01990b386ae", "83b8c2e67b6e42c6a8ae5781f64498c4", "a21edbe135424176a231c7f8ddc743c7" ] } }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Several commits (2) will be pushed upstream.\n", "WARNING:huggingface_hub.repository:Several commits (2) will be pushed upstream.\n", "The progress bars may be unreliable.\n", "WARNING:huggingface_hub.repository:The progress bars may be unreliable.\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Upload file pytorch_model.bin: 0%| | 32.0k/413M [00:00 main\n", "\n", "WARNING:huggingface_hub.repository:remote: Scanning LFS files for validity, may be slow... \n", "remote: LFS file scan complete. \n", "To https://huggingface.co/charanhu/finetuned-bert-mrpc\n", " 6adf313..b791847 main -> main\n", "\n", "To https://huggingface.co/charanhu/finetuned-bert-mrpc\n", " b791847..225773b main -> main\n", "\n", "WARNING:huggingface_hub.repository:To https://huggingface.co/charanhu/finetuned-bert-mrpc\n", " b791847..225773b main -> main\n", "\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "'https://huggingface.co/charanhu/finetuned-bert-mrpc/commit/b7918478a19d91e87fe6a75c169e623d1c82e38a'" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 8 } ], "source": [ "trainer.push_to_hub()" ] }, { "cell_type": "markdown", "metadata": { "id": "wbPD-5cSLOgf" }, "source": [ "If you are using your own training loop, you can push the model and tokenizer separately (and you will have to write the model card yourself):" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Xr9gYHhdLOgg" }, "outputs": [], "source": [ "# model.push_to_hub(\"finetuned-bert-mrpc\")\n", "# tokenizer.push_to_hub(\"finetuned-bert-mrpc\")" ] }, { "cell_type": "markdown", "metadata": { "id": "ETt34170LOgg" }, "source": [ "## You can load your model from anywhere using from_pretrained!" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "id": "KX6ZN6JLLOgg", "outputId": "ec2cf291-f55f-42f1-fe9f-c27124e253b6", "colab": { "referenced_widgets": [ "5cf1e046f2294330a962c24d04934c85", "27ba0ded4dd1420faed7d5bdadae7115", "6ad584d7e5e244d49c84e9f7fd6efd6b", "b608e83577834ef49a6edec00824c7c0", "ca61fb9b86744edca8ffa393fe01c4cd", "ff9c760959ad4cb4b09f8da96778d8d9", "f6b5403c769143f688201afaf05254e5", "3cf335a6b89c48c796d53c6f65c99ad2", "8d2b8e2aebe94ddeb71632e790080f6f", "6831b32fc228496f8615ec9dd92ce843", "cb6cb92d1493427888dc42200a65532e", "d11799c1201b43b7b2e34dd71687d293", "a17586e4e65c427ba06bb1d3095a5462", "1204139c01744b448c0fc815948584e2", "36758054a6d94dc085e6135438c9cac2", "5b9764ec211f4c23a984540683f38379", "3c6fa49b5703453583cba12dd16a6377", "72c3975fcbb144bbbfad60c9124963c9", "f3a2a45b84d149438b4b84c643df7dbc", "8418413c95b54f5aa6584dde12254f51", "4058da21c8ac4d8f81cb7542e6bc871d", "43a00d3e17b240398b830334547bd7d6" ], "base_uri": "https://localhost:8080/", "height": 81 } }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Downloading: 0%| | 0.00/725 [00:00