diff --git "a/AIRA_FineTuning.ipynb" "b/AIRA_FineTuning.ipynb" --- "a/AIRA_FineTuning.ipynb" +++ "b/AIRA_FineTuning.ipynb" @@ -19,7 +19,7 @@ "base_uri": "https://localhost:8080/" }, "id": "SBWCrz5GfBXo", - "outputId": "34bbeb89-a5d5-4ad4-f87f-a87bffc735a2" + "outputId": "c3897ecc-56b3-48fc-b9cb-1f8bb2809fbe" }, "outputs": [ { @@ -28,12 +28,18 @@ "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Collecting transformers\n", - " Downloading transformers-4.30.1-py3-none-any.whl (7.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m77.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.0)\n", + " Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.2/7.2 MB\u001b[0m \u001b[31m78.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting datasets\n", + " Downloading datasets-2.13.1-py3-none-any.whl (486 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m486.2/486.2 kB\u001b[0m \u001b[31m47.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting codecarbon\n", + " Downloading codecarbon-2.2.4-py3-none-any.whl (176 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m176.0/176.0 kB\u001b[0m \u001b[31m24.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n", "Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)\n", " Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m30.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m236.8/236.8 kB\u001b[0m \u001b[31m31.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0)\n", @@ -41,52 +47,56 @@ "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n", "Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)\n", " Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m107.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m115.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting safetensors>=0.3.1 (from transformers)\n", " Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m80.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m87.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.65.0)\n", - "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.4.0)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.5.0)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.15)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2022.12.7)\n", - "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n", - "Installing collected packages: tokenizers, safetensors, huggingface-hub, transformers\n", - "Successfully installed huggingface-hub-0.15.1 safetensors-0.3.1 tokenizers-0.13.3 transformers-4.30.1\n", - "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", - "Collecting codecarbon\n", - " Downloading codecarbon-2.2.3-py3-none-any.whl (174 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m174.1/174.1 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting arrow (from codecarbon)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (9.0.0)\n", + "Collecting dill<0.3.7,>=0.3.0 (from datasets)\n", + " Downloading dill-0.3.6-py3-none-any.whl (110 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m110.5/110.5 kB\u001b[0m \u001b[31m16.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (1.5.3)\n", + "Collecting xxhash (from datasets)\n", + " Downloading xxhash-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.5/212.5 kB\u001b[0m \u001b[31m27.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting multiprocess (from datasets)\n", + " Downloading multiprocess-0.70.14-py310-none-any.whl (134 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.3/134.3 kB\u001b[0m \u001b[31m20.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: fsspec[http]>=2021.11.1 in /usr/local/lib/python3.10/dist-packages (from datasets) (2023.6.0)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.8.4)\n", + "Collecting arrow (from codecarbon)\n", " Downloading arrow-1.2.3-py3-none-any.whl (66 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from codecarbon) (1.5.3)\n", - "Collecting pynvml (from codecarbon)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting pynvml (from codecarbon)\n", " Downloading pynvml-11.5.0-py3-none-any.whl (53 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from codecarbon) (2.27.1)\n", - "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from codecarbon) (5.9.5)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from codecarbon) (5.9.5)\n", "Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/dist-packages (from codecarbon) (9.0.0)\n", "Collecting fuzzywuzzy (from codecarbon)\n", " Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)\n", "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from codecarbon) (8.1.3)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (23.1.0)\n", + "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.0.12)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.0.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.2)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.3)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.6.3)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.5.7)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n", "Requirement already satisfied: python-dateutil>=2.7.0 in /usr/local/lib/python3.10/dist-packages (from arrow->codecarbon) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->codecarbon) (2022.7.1)\n", - "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas->codecarbon) (1.22.4)\n", - "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->codecarbon) (1.26.15)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->codecarbon) (2022.12.7)\n", - "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->codecarbon) (2.0.12)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->codecarbon) (3.4)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2022.7.1)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7.0->arrow->codecarbon) (1.16.0)\n", - "Installing collected packages: fuzzywuzzy, pynvml, arrow, codecarbon\n", - "Successfully installed arrow-1.2.3 codecarbon-2.2.3 fuzzywuzzy-0.18.0 pynvml-11.5.0\n" + "Installing collected packages: tokenizers, safetensors, fuzzywuzzy, xxhash, pynvml, dill, multiprocess, huggingface-hub, arrow, transformers, codecarbon, datasets\n", + "Successfully installed arrow-1.2.3 codecarbon-2.2.4 datasets-2.13.1 dill-0.3.6 fuzzywuzzy-0.18.0 huggingface-hub-0.15.1 multiprocess-0.70.14 pynvml-11.5.0 safetensors-0.3.1 tokenizers-0.13.3 transformers-4.30.2 xxhash-3.2.0\n" ] } ], "source": [ - "!pip install transformers\n", - "!pip install codecarbon" + "!pip install transformers datasets codecarbon" ] }, { @@ -104,17 +114,308 @@ "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 467 + "height": 702, + "referenced_widgets": [ + "34e200081e97446c8cd7856137d2ed39", + "3e9822a5f0d046728156b5a0c3e3c082", + "6c0f0718a27944a49df4feeb5eb7f0a6", + "94ce8d2f7bf24a3cb3487ea88094beda", + "579d1717cad5475e9f772e84bdbabfc1", + "1120e50796944b58af0f4dbd639482d2", + "53f08c1231c74d14a81bd9485ba96deb", + "5456362a6850407db4bebc1ded3cbe1a", + "f3aaec6e08094e19bff0f311e6a64158", + "485ffa98fc3a473096e4af30be5aa60a", + "78fa4f26ce174a108c43033b0dd5f2b6", + "158650cfa6d14e00ab4825953fabe91e", + "16d718b78ba240d7b930ff422c9cbc67", + "1092f96bf4b2456893e925e26d200d57", + "02a61e2a34b3458a8b9313608812ca7d", + "a6d3f7f2b090401f9cba84ccb242c5ec", + "f1f0f70d41b64c4892da63d0aa6ec3c9", + "edb44c8f27af400cab27572250043b11", + "9b37e0bc0c91405a9ce5a925fb46ba7e", + "8e07a97bcc0548fbb8681ad1bbd1dcbe", + "857f65e34e8a4cee92857a61fb3febb2", + "e0f81131b319404d8f48a1ba4edac5bc", + "a39380581e804f5fb96018c6058ec4ac", + "29e76265e50842c3b3aa3e68825c1cc9", + "4622678ef1c440eaba2b21dabaef8a4f", + "dac43af304db4a2da06c7b18201413f9", + "00485f3c3dbc40b5be49c8f4fd49c3d7", + "095609fe73cb4e9cbd5838422855c951", + "7bc85c49a9154847b716aeaaf47d4558", + "aba8b6027ea447a8b927b8674a3c78c7", + "54995b0a7e01403d816d4c5dcea3ec39", + "9cfd52a8054f436fa406dd12657d697c", + "380d234d63644d31b43286726d4346e0", + "1962a1b46f7d4c0bac70d45435b1b0b8", + "476c1fd9e1644ceda44275a4630cff71", + "c56b207b5c554646b09ee80b6c6f87cc", + "295050c536e04607be4944fa8827a66d", + "7848eb314e39456faac551687946b12b", + "9d9e50a4097f4aa38e07d69728e47298", + "7b3c5ea2a594482a9d6e60510d9e335f", + "8c988598365c414a9fb20a7f6b37365e", + "0377a5e70f2540878f435c3c13b915fc", + "20ec5a844483409f8df19a37e41cb0c9", + "d5b5905850d74cf8b1e8f2625658b13e", + "380e286c3a844a91aa34bb28439cc430", + "5771f51108984384a0d4a458320f74f3", + "c0ecbbd2b8c3474f848a017f34a1bbaa", + "41260544bc6c4d25986e0819ae970219", + "09b056bc424b4322ab06b9a5adc53f8e", + "bb2e0757b2f549b4af6695711ab9b3f0", + "57dc02294d124354a292ec6df2d3e95e", + "e7e1e30d208d4179a09f66c47d2397b2", + "067fcad17d364367ae4047fdb63ef59f", + "6c0b0dbd3d6b46959bcab23a67319f6b", + "c62afc791f8241eaaf41e7f3fc84cd81", + "d8e16763af2a46f0852402390bb7ce96", + "dae11f5653c643368c98ee4bcf2d8991", + "0a71aa8a8a394e4faec5deac27fcd796", + "348e188ace234032bd59e83782586986", + "123686d65ba4401c95d67007f30f502a", + "12149f11baaf41aeb063d336df1276bb", + "c943036f9a384fcd8e6cc5c19b3883ed", + "95350ac6a725432892a41949a7af1cc5", + "6b421333f2b64bd29d0dadca8de348f7", + "a536609d06b745818bb6da478dcc62c3", + "7bffb7e759f2478e9da398d30d399474", + "76153959dfc74095b0412eb96a2a6b57", + "5741589d2c0b4149ae8ef52ba6262002", + "f2c3bde660014bbc8885ff8f133ad0a8", + "521f6b271d504c59971e181b4c6087ce", + "524dee0cb1b145b1a1f7a22f4f77c20c", + "f47d8748244b4c0e8f69e62960b60114", + "b2569b4d9ee340f2a042f3fcf1a9479e", + "df88461423e0442dbb5db3032685cee4", + "430d29dfadd04147ba3a0e15ffa37e00", + "b22bfb10324d4d138c4e48bb853e54a6", + "853ab6f785124189987407cebeeb36ec", + "3cca4d9f55024c8e950a1f011d2a9e71", + "746b4ff75fe549d294822ac0c17b9eea", + "094bfddc08c143adb3ac8f92b8d8a35c", + "b78b1570f36f4591aa565d06d5c97aa2", + "0edd2a6262c84cedb8639689f12e3cb8", + "00b3c8a77c174b33ac6df812bd38caf8", + "c585a27add9a4d28b4fbfbfd0d546639", + "33c26d62ae80471d92d40b19729ea0a1", + "6e0bd73d8bf34f05ad9337d44f683f69", + "943e960e06fa4e179f0f8f24d80c75c5", + "9c67ef19d64d4496baa7a3ecaa65ac40", + "093d5f2f31d248e49b53e13e34a39d19", + "56f5e6f6f1994855b71defe58c3caf73", + "6503cd0b1071414cb852faa395891b66", + "3fa36692e1154c08bf1bf50fa4a276f4", + "48a7142c00144de1b49a1c60166332c1", + "54b3145407814cb6909b6a42e0d995c1", + "b7d2a893077a42578d701d5e2cb04146", + "3ad920375fa84ec5a1c8ef906a132f66", + "a029c61ef45d483f855f9c0360b58d9f", + "f55dff66b5ab47649623dec665bab8c7", + "f16dd62059ba49c18d57231013da91c8", + "02b3bb2540344873bdc2621a283e4b9d", + "83a86345acec4ad1b53dd850a61a28c8", + "7ef01e51e8574be18c5e61fd15885dc4", + "2a77fed3b2c44741be4b3798561701ab", + "85f79d571cf74369b95d456e3aa71b10", + "ad6e4edc3c684930bec491e5f146cce0", + "87ff0aba75554acc9ca5631a3a8eb86b", + "e1095519890e463f8bd1028784e926a7", + "b5543a0ad1da48c5b21cd21f77b78913", + "e212138cf44649e7a116a7395d574e50", + "4d792f196b3348a6afa7be27296b2e8e", + "6831afd511fc4a468023a637abef4ffc", + "0db1a095cb6f4506b57afe4478ea760f", + "68de400689c94eaaab36d4a03177291e", + "545a9aeba4934b5394f782b3f7805484", + "b2cbefc0ba8d4b869a531cbcd1dd1279", + "8ebf592281764403b1e7a71be6ab4179", + "4c480e7f0ae04afeb728c25829d85b35", + "c5b458f29c6d4932926f33171d21922d", + "fae6fa9f6f6b4273851008bde9e21e9a", + "838feb07922241239a29e6640de20f72", + "dc91135f04184e29817d1d8f26637e17" + ] }, "id": "7MbpXGu-v4f1", - "outputId": "7d155c3b-13f2-417d-a5b9-6d0dce08a5d7" + "outputId": "f0085c70-8f57-47f3-c719-16f95e40fa40" }, "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "34e200081e97446c8cd7856137d2ed39", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading readme: 0%| | 0.00/5.12k [00:00\n", + "
\n", "
\n", "
\n", "