{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "Q-bj6K7Qv4ft" }, "source": [ "# Fine-Tuning a Generative Pretrained Transformer (`GPT`)\n", "\n", "1. Install required libraries." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SBWCrz5GfBXo", "outputId": "2d5e5c69-720a-4a48-e3c8-f425b16e6010" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m22.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m519.6/519.6 kB\u001b[0m \u001b[31m33.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.8/179.8 kB\u001b[0m \u001b[31m22.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m294.8/294.8 kB\u001b[0m \u001b[31m33.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m64.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.3/1.3 MB\u001b[0m \u001b[31m63.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m115.3/115.3 kB\u001b[0m \u001b[31m13.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m23.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m17.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m66.4/66.4 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m6.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hToken will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.\n", "Token is valid (permission: write).\n", "Your token has been saved to /root/.cache/huggingface/token\n", "Login successful\n" ] } ], "source": [ "!pip install transformers datasets codecarbon -q" ] }, { "cell_type": "markdown", "metadata": { "id": "y5XnfvSH7w4z" }, "source": [ "2. Load the data from the hub." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 675, "referenced_widgets": [ "8e912129c09043b390effea94ec6bd51", "a7fc9ef94ad14bd2b158f3ca94cfb1ff", "5a9995f7d49f4a54a3b334b994bef5d0", "93db6e4b27964a9eb8536c1830eee5c5", "3284e401bc5d452b87a030e29930a39c", "43a8f7284a374b489d9d43e12e2b27c2", "13b7b700e2f54832b5d07a9891b323a9", "682b0226d3bc48aea056f5fa91dd9c10", "b9eb3c8986474f60b80c3cad6e66374d", "dc94fbbafe894ee8af64c32f41c009df", "99f2428f4efb4736a28102858900be8d", "630a07984a4044f99d5e7d4d8e1fe473", "46eb617bba4042f0a51744919c5e3066", "ec3090b36f984ae6a70d32f1445d3724", "c0ea1f788cb64b728ffd0a091b4fb609", "8d617df85593488e9db4e425e0277a0c", "cbd2bd26946446c5bd2b22195a56d1d1", "d31c02c361454b7bb3e2fd57013a22fa", "8a788ef676d94f47a1c5faa41b3cff9e", "09c64da64cf94d7784851e8a7cbe3f90", "08ae50674c134c13b2c9b105784324ea", "94729f1e87324d95ac27ab7494514062", "79951f91b806441986df0ffb40c08b7a", "dcc1cf21786e4c9aa3e06b34b8690ee6", "e27715e13a6a416991333856cf96d48e", "c75a73a2c890454e93a09cd48c5a7f8e", "5494e412fff14d6b99a465eaa2346547", "dd4f6c7dab8b4b92a1b66ec1d0302406", "69eab84d31f8479aaac5023f891a4a19", "887715d12a9a40cea7c6678112122fb9", "f73ce7044ad445a182d2d90a83615ea8", "f16ce79c17ff4ef0927e8ea5b21b4a39", "0c9146b8a9304ceab6c8b39b244307f0", "c4b6b313c6244323bdadba539190b1ab", "7a9a7696a7684eae8dc4d3c1e33cf16b", "0353db79d27b4d7692b76e26595b8faf", "e7c1bd9ece3f4eea8659d31ba2c8b07c", "f065dc07ccc2489cbb5096a8c964234d", "cd6448cf2c4245709ff64b0429629e33", "951cdd08ab2649bb98c3bd9ffcc4be3d", "3054078e9b9646c8b63ca7ac62e66d00", "148a714b30e847a697b23dee916b9540", "f48bbe67c4a34a86934f6124c97bfffd", "ac850b8d5b944fad9ffac61910f9c39b", "1909ba5e1a2d4fd5bd5c7d5d7786f9b4", "9b4bf3e417ef4d24b199adf2a94e87b0", "5c2aee872d0e4cfdae4d566c9e090aac", "d00c2c7e53d049529b849641971ee35a", "f9594d2697b24eacbdcc12c9eaffd54e", "2c221917f41248e8809fbd87d2cc610b", "165633fe87ae43178cf52ff4593ed152", "a5a33063265443e9bd1aabf6bb690a4a", "3b010fdfe6c5445eaec99ce8bbc4d790", "a15584ff2ef64fab8c00d9d2a5c4c5bf", "a94ff19be63a4493b7dbbeeabe8546cd", "7060b79ca9544a529be5961defef7fa1", "80a46eda62ff4682a1d84986c9fd7963", "b7577ee6d2e84af4891a98d0d1c80fb3", "746110fde07c40998cff1ee297dade22", "4d82c096c3ba460a9abc940c857f0931", "d67b90a2681943569aa55f70dd25d005", "b682853c0c13496b90ee289055a244a3", "16d0b65031b74cb4a631b0a121d64329", "540204ea089b4c0d88d8d553ea7154b3", "41937c5e515c4b2f8c45b5fe7b93092b", "9e54b1bd187248eeb65e54a298d8cf3d", "c1b832140ff5453faa88569d7b8f8931", "7f85c7b7c5ca422ea5e015f71097d5ae", "14a36b3e1d7d4c488f8413d399e8d39c", "65b1943628c546b6bcb1695670e09b51", "944a3b4146934bac87e0150aa2325280", "9d25c3c16b784dcbb5a25fb41aa6cfde", "d5e59a4b74a34b5c9c2ffd45950a799e", "99b8f6c6863144f5bfd5d55e96f9fef7", "cf230ca388244f939312969956ed93da", "0d45d8ca7c4c4e2a80ef88c7de5b40e3", "78393cc3fbdb45a78784cccda45aa8e4" ] }, "id": "7MbpXGu-v4f1", "outputId": "5370c525-b56b-45b6-e577-f3eb9ea9e9be" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8e912129c09043b390effea94ec6bd51", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading readme: 0%| | 0.00/2.81k [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "630a07984a4044f99d5e7d4d8e1fe473", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data files: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "79951f91b806441986df0ffb40c08b7a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/26.7M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c4b6b313c6244323bdadba539190b1ab", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data: 0%| | 0.00/29.5M [00:00, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1909ba5e1a2d4fd5bd5c7d5d7786f9b4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Extracting data files: 0%| | 0/2 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7060b79ca9544a529be5961defef7fa1", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating english split: 0%| | 0/41815 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "c1b832140ff5453faa88569d7b8f8931", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Generating portuguese split: 0%| | 0/41815 [00:00, ? examples/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", " | prompt | \n", "completion | \n", "
---|---|---|
0 | \n", "Why can camels survive for long without water? | \n", "Camels can survive for long periods without wa... | \n", "
1 | \n", "Why mobile is bad for human | \n", "Mobile phones as technological devices have be... | \n", "
2 | \n", "What is a polygon? | \n", "A polygon is a two-dimensional geometric figur... | \n", "
3 | \n", "How do I start running? | \n", "Starting a running routine can be a great way ... | \n", "
4 | \n", "Which episodes of season four of Game of Thron... | \n", "Michelle MacLaren directed two episodes in Sea... | \n", "
... | \n", "... | \n", "... | \n", "
41810 | \n", "What can I call you by? | \n", "You can call me Aira. How may I assist you today? | \n", "
41811 | \n", "What's your identification name? | \n", "As an artificial intelligence, I do not have a... | \n", "
41812 | \n", "What are you called? | \n", "Hello! I am called Aira. How can I assist you ... | \n", "
41813 | \n", "How do you introduce yourself? | \n", "Hello! I am Aira, your helpful, respectful, an... | \n", "
41814 | \n", "What should I say when I address you? | \n", "When addressing me, you can simply call me Air... | \n", "
41815 rows × 2 columns
\n", "