{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "Q-bj6K7Qv4ft" }, "source": [ "# Fine-Tuning a Generative Pretrained Transformer (`GPT`)\n", "\n", "1. Install required libraries." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SBWCrz5GfBXo", "outputId": "21d0811e-0f41-48d5-8f51-fbc196557d0a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/7.6 MB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.1/7.6 MB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:03\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/7.6 MB\u001b[0m \u001b[31m52.8 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m86.1 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m62.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.3/519.3 kB\u001b[0m \u001b[31m49.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.8/179.8 kB\u001b[0m \u001b[31m23.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m268.8/268.8 kB\u001b[0m \u001b[31m33.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m100.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m83.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m16.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m23.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m18.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m9.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h" ] } ], "source": [ "!pip install transformers datasets codecarbon -q" ] }, { "cell_type": "markdown", "metadata": { "id": "y5XnfvSH7w4z" }, "source": [ "2. Load the data from the hub." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 1000, "referenced_widgets": [ "adb95b02c40846dfa58ce064dacc619c", "5e01c30216e34c4b927d6a726f045bd3", "bc45595ea106496395d779280cfdaa76", "f356b84795414453b3b98e5166587e0a", "baa95c6db6ce46c09b61c2e4d716d106", "04b7cf19869148adaa3d623d9d2e1672", "49363eb1926e4b88bdfeb52e15d1c3e2", "c135f2f4f23e4a21af7fa727961cd024", "5b0c19f417754de28b65427f2da18d0d", "0748633648344d4abdedfe35722f1cf3", "f3ec6bca583c475ebb006c44c5f50c39", "1f3288dcb004450cbbbf216d6e64766d", "852751723d4040b2898d754dec3909a5", "c72871c1abdd4bdf9f929d37726352e4", "275eb8962d5740438787f464b8f9c3e0", "8d3f44a1a3fa40c694b3c21857eccbbd", "792f75c192124a62884b83fc5c60fcc5", "75773efd563e410b9d02fd928638cb7b", "bc260fedcfaa4d20b534b40c1df9b3c9", "b98bda41f90b477e999885b16bfdb422", "ee3bf5cf1599486ba4da72e60f4b5098", "8dd90992b7ad4bdc92d49191ce65865b", "73535961abfb44ba84a5658a0dc5793e", "db2bcba8c39f40ae8bd1f9a7e0f576cd", "d07612b449194e98aa55ce73116cf3e2", "0b7e9bd63df94f20a4fff2ddde8bca39", "bcbf71c3e46741daafc8f225b4be6124", "05e1b501a9694db4a7b28fc73cf26e5e", "128cd4483883472fbbb623ff858ed527", "6e4a87a7d3ea4552ac678820cbd3326b", "c3440ae15ade46f3af428955a148fb72", "9704a435b0224849a859439b4b6cde1f", "8c4269098c704cefa4b545c9a266c360", "2fa9e5fff95644729262565b95203613", "d4b9c0922fc74ec79ab8f36f1c043567", "c2233388898c4b50ba58215ce74427e5", "78d3ef038d0443679310455d5023ec57", "5138ce28fe604221bcd333e328152a6d", "f76eafe90c094433a8559a36ea443f1c", "2dd94310758b47e5a7ce7c382b75a940", "5fc94a020ece405fa5e7f175dd83c68a", "290e11cb61504da69da83f4120a57aa7", "f00829beb3954add89e757beab97e5ca", "67640eccf30f45a1abb523febd0980f2", "f05c4c369f7e434bb68a48c07a6f289c", "b531348167a449f5aa73139e4f3455a2", "26a398beecd74248ba3f0c1b3b71373f", "7438f54ed8c54f8db9ffce9b62bb31d6", "cb990dcb8f924e96828b2fe41d17de04", "03b72b157a7747fbb673eacb03d3b055", "1aa1b645deed4a0e9bdaf26b931eaaab", "bbb33e1315ed4e32b53b063b7f0e3906", "98e3f4131af8473eb49681d461a075b3", "ec4f554258744524a8ef0ee75400178b", "329ce55a8c7a4a1ea03e2e26fb749d8f", "7af7467aeaa343908e190a3e27565c9e", "09919332f4c74e5296f9d760b22cfa6d", "7fad9df6fd6542629334ca23c018ab30", "b13c3fb73c5245f3a05a5a636e00e263", "6400b4dfd2214d8b86a47403da880640", "3fe7aacc696a42f8b7c229aec6d1019a", "020b46bf0184444a9b771a4178471d20", "672c3f8920114c3b95f6ac5b8abf279c", "4634dc8baecd4e1ab820e33a94d712cc", "494f9cdcee284160a5f3ff515a33bda9", "f0fb5681e3a7406caddbab328dc0d101", "da69e26d69d24ebb9c3a7ea9e212bd5c", "01889c59797e4a61a3d0db186a20e032", "fd9581f1729347fc940e45e162bed4cb", "7471c227abee451680217c5504303cfa", "43f3ad94a37642269c0d99a8e975a6ab", "1bc9a8e4962942b69a9dd93b366f6119", "0e612446f35b4503bcf2969eec277a3c", "a1055ccc12d8414a98dc7045052d8ad8", "58735352e86b4a48b896937297a39ab6", "ac54dceee9c74bfab3b70f515fa44c59", "51f382e4a4d04001ac1d15c672c26b51" ] }, "id": "7MbpXGu-v4f1", "outputId": "d67fd0a7-efa9-4f6f-8670-6aee747e8bf1" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "adb95b02c40846dfa58ce064dacc619c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading readme: 0%| | 0.00/471 [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1f3288dcb004450cbbbf216d6e64766d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data files: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "73535961abfb44ba84a5658a0dc5793e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/26.7M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2fa9e5fff95644729262565b95203613", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/29.5M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f05c4c369f7e434bb68a48c07a6f289c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Extracting data files: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7af7467aeaa343908e190a3e27565c9e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating english split: 0%| | 0/41815 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "da69e26d69d24ebb9c3a7ea9e212bd5c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating portuguese split: 0%| | 0/41815 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", " | prompt | \n", "completion | \n", "
---|---|---|
0 | \n", "Por que os camelos conseguem sobreviver muito ... | \n", "Os camelos podem sobreviver por longos período... | \n", "
1 | \n", "Por que o celular é ruim para os humanos | \n", "Os telefones celulares como dispositivos tecno... | \n", "
2 | \n", "O que é um polígono? | \n", "Um polígono é uma figura geométrica bidimensio... | \n", "
3 | \n", "Como começo a correr? | \n", "Começar uma rotina de corrida pode ser uma óti... | \n", "
4 | \n", "Quais episódios da quarta temporada de Game of... | \n", "Michelle MacLaren dirigiu dois episódios da 4ª... | \n", "
... | \n", "... | \n", "... | \n", "
41810 | \n", "Como posso ligar para você? | \n", "Você pode me chamar de Aira. Como posso ajudá-... | \n", "
41811 | \n", "Qual é o seu nome de identificação? | \n", "Como inteligência artificial, não tenho nome d... | \n", "
41812 | \n", "Como você é chamado? | \n", "Olá! Meu nome é Aira. Como posso ajudá-lo hoje? | \n", "
41813 | \n", "Como você se apresenta? | \n", "Olá! Eu sou Aira, sua assistente prestativa, r... | \n", "
41814 | \n", "O que devo dizer quando me dirijo a você? | \n", "Ao se dirigir a mim, você pode simplesmente me... | \n", "
41815 rows × 2 columns
\n", "