{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "Q-bj6K7Qv4ft" }, "source": [ "# Fine-Tuning a Generative Pretrained Transformer (`GPT`)\n", "\n", "1. Install required libraries." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SBWCrz5GfBXo", "outputId": "1e1ab31b-3b54-4c39-a872-fb39e4c5c8a4" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.30.2)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.0)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.1)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n", "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.13.3)\n", "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.3.1)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.65.0)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.4.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.5.0)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.15)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2022.12.7)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n", "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n", "Requirement already satisfied: codecarbon in /usr/local/lib/python3.10/dist-packages (2.2.3)\n", "Requirement already satisfied: arrow in /usr/local/lib/python3.10/dist-packages (from codecarbon) (1.2.3)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from codecarbon) (1.5.3)\n", "Requirement already satisfied: pynvml in /usr/local/lib/python3.10/dist-packages (from codecarbon) (11.5.0)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from codecarbon) (2.27.1)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from codecarbon) (5.9.5)\n", "Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/dist-packages (from codecarbon) (9.0.0)\n", "Requirement already satisfied: fuzzywuzzy in /usr/local/lib/python3.10/dist-packages (from codecarbon) (0.18.0)\n", "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from codecarbon) (8.1.3)\n", "Requirement already satisfied: python-dateutil>=2.7.0 in /usr/local/lib/python3.10/dist-packages (from arrow->codecarbon) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->codecarbon) (2022.7.1)\n", "Requirement already satisfied: numpy>=1.21.0 in /usr/local/lib/python3.10/dist-packages (from pandas->codecarbon) (1.22.4)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->codecarbon) (1.26.15)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->codecarbon) (2022.12.7)\n", "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->codecarbon) (2.0.12)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->codecarbon) (3.4)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7.0->arrow->codecarbon) (1.16.0)\n" ] } ], "source": [ "!pip install transformers\n", "!pip install codecarbon" ] }, { "cell_type": "markdown", "metadata": { "id": "y5XnfvSH7w4z" }, "source": [ "2. Load the data from the hub." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 467 }, "id": "7MbpXGu-v4f1", "outputId": "3d8858dc-6ea1-4871-cb04-44dcecce51a6" }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
promptcompletion
0Which is a species of fish? Tope or RopeTope
1Why can camels survive for long without water?Camels use the fat in their humps to keep them...
2Alice's parents have three daughters: Amy, Jes...The name of the third daughter is Alice
3Who gave the UN the land in NY to build their HQJohn D Rockerfeller
4Why mobile is bad for humanWe are always engaged one phone which is not g...
.........
53129How do computers communicate and network with ...Computers communicate and network with each ot...
53130How are websites different from web applications?Websites and web applications are similar in t...
53131What is open-source software and its benefits?Open-source software is software that is made ...
53132What is a cookie and how is it used in web bro...A cookie is a small piece of data that a websi...
53133What is cloud storage and its advantages for d...Cloud storage is a service that allows you to ...
\n", "

53134 rows × 2 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ], "text/plain": [ " prompt \\\n", "0 Which is a species of fish? Tope or Rope \n", "1 Why can camels survive for long without water? \n", "2 Alice's parents have three daughters: Amy, Jes... \n", "3 Who gave the UN the land in NY to build their HQ \n", "4 Why mobile is bad for human \n", "... ... \n", "53129 How do computers communicate and network with ... \n", "53130 How are websites different from web applications? \n", "53131 What is open-source software and its benefits? \n", "53132 What is a cookie and how is it used in web bro... \n", "53133 What is cloud storage and its advantages for d... \n", "\n", " completion \n", "0 Tope \n", "1 Camels use the fat in their humps to keep them... \n", "2 The name of the third daughter is Alice \n", "3 John D Rockerfeller \n", "4 We are always engaged one phone which is not g... \n", "... ... \n", "53129 Computers communicate and network with each ot... \n", "53130 Websites and web applications are similar in t... \n", "53131 Open-source software is software that is made ... \n", "53132 A cookie is a small piece of data that a websi... \n", "53133 Cloud storage is a service that allows you to ... \n", "\n", "[53134 rows x 2 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "from datasets import load_dataset\n", "\n", "dataset = load_dataset(\"nicholasKluge/fine-tuning-instruct-aira\", split=\"aira_instruct_english\")\n", "\n", "df = dataset.to_pandas()\n", "\n", "display(df)" ] }, { "cell_type": "markdown", "metadata": { "id": "fEaDTvdOe8rr" }, "source": [ "3. Load `GPT2Tokenizer` and add the chosen special tokens (`'<|startoftext|>', '<|endoftext|>','<|pad|>'`)\n", "4. Create demonstrations by prepending the special tokens.\n", "5. Calculate the maximum length (in tokens) that the demonstrations have (the dataset was constructed, for efficiency and fast training, to be below the 300-token range)." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 241, "referenced_widgets": [ "2231293622594eceb80d6481799ddcad", "c1135b0eb85044ab8e47e772e0c71f3c", "41b5bf9be3394e728e3f3699026ce885", "96552a3dbfa94bb4818b054b2675c6b8", "13c74818a5b949438589a0c73007e5a6", "6bab36d7ee934127a80a53ab87e0e508", "69e756a204214d689240aef2b9986118", "b27100866f8b4c08a7931f4d3d84f2b3", "47472c3a381f4eca9af9d03df1718065", "9cdd14da170b4b40b029b0150a40afc7", "4cf2d43d7ae24582ba035d9770f8e860", "6539b916daa747adab05dcff94c2cd3c", "433e467610674781b011918357e68baa", "57a05e39d49a4b4b853381676bd7ec9d", "0659624c4b9a4ce8b396b8d45814c580", "7ebf18e21686429391b47866e46f8c6d", "66e3178dae0d4b78ac3e00374ebd8e6b", "47449e8feed84a5cb428b2ac0006a053", "69d7726ac4804227ab1b1c026ca29c2b", "ba9429a433b5479eae6e188f1c5b864f", "ab4d80ebe66546ec92877f3288729bcd", "e1760e7d52464351ba391e002132a0bf", "e2ca1209ffe94778a38a383641b09cb9", "30ce9a42524446edbf99a1147377ae8e", "c119c152c0f3485daebb7cbc5992f13a", "75ef15d0ffba4344bc07a22690a99f13", "601e6dedd4a64ddcbeb5ead5b20c70aa", "5b053f2aac4e42e09c446691cbe67416", "62ebb5cbcbe94e0bb416896ec8493b5c", "96de7beae5b94e6a92f9f52b9d824035", "e521d59d193b42a28f29b3ad77c12c96", "acbc045dd45c41c484e59304a66336f4", "b8b4e3e26208445e91aa15ee3ea6e6b0" ] }, "id": "hfu84fWIv4f9", "outputId": "4c471e66-3497-433d-bc65-2287608a6f32" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2231293622594eceb80d6481799ddcad", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)olve/main/vocab.json: 0%| | 0.00/1.04M [00:00',\n", " eos_token='<|endoftext|>',\n", " pad_token='<|pad|>')\n", "\n", "df['demonstrations'] = tokenizer.bos_token + df['prompt'] + tokenizer.eos_token + df['completion'] + tokenizer.eos_token\n", "\n", "df['length'] = df['demonstrations'].apply(lambda x: len(tokenizer.encode(x)))\n", "\n", "print(\"Total number of demonstrations: \", len(df))\n", "print(f\"The longest demonstration is {df['length'].max()} tokens long.\")" ] }, { "cell_type": "markdown", "metadata": { "id": "wkMO17K5e8rs" }, "source": [ "6. Create the Dataset class." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "WlbAfMQ4v4gA" }, "outputs": [], "source": [ "import torch\n", "from torch.utils.data import Dataset\n", "\n", "max_length = 300\n", "\n", "class DemoDataset(Dataset):\n", "\n", " def __init__(self, demonstrations, tokenizer, gpt2_type=\"gpt2\", max_length=max_length):\n", "\n", " self.tokenizer = tokenizer\n", " self.input_ids = []\n", " self.attn_masks = []\n", "\n", " for demo in demonstrations:\n", "\n", " encodings_dict = tokenizer(demo,\n", " truncation=True,\n", " max_length=max_length,\n", " padding=\"max_length\")\n", "\n", " self.input_ids.append(torch.tensor(encodings_dict['input_ids']))\n", " self.attn_masks.append(torch.tensor(encodings_dict['attention_mask']))\n", "\n", " def __len__(self):\n", " return len(self.input_ids)\n", "\n", " def __getitem__(self, idx):\n", " return self.input_ids[idx], self.attn_masks[idx]" ] }, { "cell_type": "markdown", "metadata": { "id": "711Krm6Te8rt" }, "source": [ "7. Split the data into training and validation splits." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-IOfa2PEv4gD", "outputId": "ca19c34e-79fe-43ef-f0ad-b4f06e2d8f5f" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of training samples: 47,820\n", "Number of validation samples: 5,314\n" ] } ], "source": [ "from torch.utils.data import random_split\n", "\n", "dataset = DemoDataset(df.demonstrations.to_list(), tokenizer, max_length=max_length)\n", "\n", "train_size = int(0.9 * len(dataset))\n", "val_size = len(dataset) - train_size\n", "\n", "train_dataset, val_dataset = random_split(dataset, [train_size, val_size])\n", "\n", "print('Number of training samples: {:,}'.format(train_size))\n", "print('Number of validation samples: {:,}'.format(val_size))" ] }, { "cell_type": "markdown", "metadata": { "id": "vSqKuRjIe8ru" }, "source": [ "8. Create the `DataLoaders` and specify the `batch_size`." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "cUkCNV-6v4gG" }, "outputs": [], "source": [ "from torch.utils.data import DataLoader, RandomSampler, SequentialSampler\n", "\n", "train_dataloader = DataLoader(\n", " train_dataset,\n", " sampler=RandomSampler(train_dataset),\n", " batch_size=8 # 32, 20, 8, 8\n", " )\n", "\n", "# validation data loader doesn't need randomization\n", "validation_dataloader=DataLoader(\n", " val_dataset,\n", " sampler=SequentialSampler(val_dataset),\n", " batch_size=8\n", " )" ] }, { "cell_type": "markdown", "metadata": { "id": "0vxvcTIHe8rv" }, "source": [ "9. Load the base model (`GPT2LMHeadModel`)." ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 550, "referenced_widgets": [ "0fc39225b0694840b7f421f725c135f2", "4886d255282b4eefa37e7a1db102a132", "7a37f293f6884093ab588c9ded32db07", "bd0f1d0070294b6eaebba425e9224150", "7cccbb84d04b4aaaa09309bb669f349d", "2cdd431ccdb64e409229913cc43e2544", "ba5408f14ed54e75a9724d39ac67d5c2", "10d2880b45814382951aa1015f4f1f5f", "3657a339af424c51ac7534f91fcb5893", "1bb5acd98f244616905e0f5cb8752a8a", "b023d77e9b854acd94ac0a91feb2de83", "b2d0b131237f49e684451afe60b57593", "8691d4c5d8d54e2494d2bf149feb9980", "7ddef1b949f04c81b6e26df3ce68cbc6", "e125c896502c4ad48b4bfbdbe3dc7383", "1e2b0f9db8654f85938565d9f694030d", "129c206371304edd9eaf34a5b56a00b8", "2d1b573ef7a242c295ed42d790a71f1e", "89f8e93f887d43ee933fa3b44a44fd72", "e58aaf5a1b9a4a2f85fa2546ee88f24e", "3181ccb3dde340c1b5c67ed12076c897", "4a22e5e3eb354b4cb63bf0aeb9105efa" ] }, "id": "Rmg-5YJqv4gH", "outputId": "1f8aa617-07b6-4774-d024-cd50d8148f5b" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0fc39225b0694840b7f421f725c135f2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading model.safetensors: 0%| | 0.00/3.25G [00:00 model.config.n_layer - UNFREEZE_LAST_N:\n", " for parameter in m.parameters():\n", " parameter.requires_grad = True\n", "\n", " for parameter in model.transformer.ln_f.parameters():\n", " parameter.requires_grad = True\n", "\n", " for parameter in model.lm_head.parameters():\n", " parameter.requires_grad = True\n", "\n", "num_frozen_layers = sum(1 for parameter in model.parameters() if not parameter.requires_grad)\n", "num_trainable_layers = sum(1 for parameter in model.parameters() if parameter.requires_grad)\n", "\n", "print(\"Number of frozen layers:\", num_frozen_layers)\n", "print(\"Number of trainable layers:\", num_trainable_layers)" ] }, { "cell_type": "markdown", "metadata": { "id": "GJ1kH3uSe8rw" }, "source": [ "11. Set the training hyperparameters." ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "id": "qlbLg6tqv4gI" }, "outputs": [], "source": [ "from transformers import get_linear_schedule_with_warmup\n", "\n", "# training rounds\n", "epochs = 2 # 5, 3, 2, 1\n", "\n", "# steps at the start of training that are ignored\n", "warmup_steps = 1e2\n", "\n", "# sample the model to test the output\n", "sample_every = 400\n", "\n", "optimizer = torch.optim.AdamW(model.parameters(), lr = 5e-4, eps = 1e-8)\n", "\n", "# total steps = number of batches * number of epochs\n", "total_steps = len(train_dataloader) * epochs\n", "\n", "# create the learning rate scheduler\n", "scheduler = get_linear_schedule_with_warmup(optimizer,\n", " num_warmup_steps = warmup_steps,\n", " num_training_steps = total_steps)" ] }, { "cell_type": "markdown", "metadata": { "id": "nz1a9lxXe8rw" }, "source": [ "12. Training/Validation loop. Track the carbon emissions of your work by using `codecarbon`. 🌱" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_X_m8XOtv4gR", "outputId": "998c5fe2-29d0-411c-a4da-67dffe9d9210" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Beginning epoch 1 of 2\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 7%|▋ | 400/5978 [04:38<1:04:12, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 400 of 5978. Loss:0.6028859615325928.\n", "\n", "\n", "Example output: What is the most secure way to store financial information.The most secure way to store financial information is to have a financial agency that is able to provide secure storage, including cold storage, with a combination of various cryptographic principles.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 13%|█▎ | 800/5978 [09:17<59:35, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 800 of 5978. Loss:0.6626878380775452.\n", "\n", "\n", "Example output: Can you give me advice on setting up a business.One of the best ways to set up a business is to create an account with a reputable financial institution. Start by researching the account you need, and you should be able to find a reliable and trustworthy account to start with. Additionally, look for services that are offered specifically for businesses or professionals and make sure to compare rates. This can help you find a comfortable level of cost for each service. Finally, make sure you have a plan for growth. Research the business and find out the resources and marketing campaigns that can help you reach your desired audience.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 20%|██ | 1200/5978 [13:58<54:59, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 1200 of 5978. Loss:0.655192494392395.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r 20%|██ | 1201/5978 [14:01<1:39:26, 1.25s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Example output: Please help me find an interesting but classic book to read for my commute this week.One interesting book to read for your commute this week is \"The Hitchhiker's Guide to the Galaxy\" by Douglas Adams. It is a classic fantasy adventure book that follows the protagonist's journey to save the universe.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 27%|██▋ | 1600/5978 [18:37<50:23, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 1600 of 5978. Loss:0.7218917012214661.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r 27%|██▋ | 1601/5978 [18:38<1:11:20, 1.02it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Example output: Name five different cheeses.Tequila, Chorizo, Mashed Potatoes, Burgundy Cheese, and Mashed Potatoes\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 33%|███▎ | 2000/5978 [23:14<45:47, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 2000 of 5978. Loss:0.4903753697872162.\n", "\n", "\n", "Example output: What is algebra and who invented it?Algebra and the foundations of mathematics are incredibly complex and require a lot of practice in order to learn. The concepts of algebra and the algebraic equations are based on equations and sequences of equations, not on purely numerical information. It is important to differentiate between algorithms and algorithms' algorithms, as Algebra and the Algebraic equations work very effectively in situations where these equations are being used to solve complex problems. For instance, using the equation A=1 and applying it to any real number, such as 1, can solve any equation of that order. The equations of motion are typically solvable by algebraic equations and have some applications in machine learning, where it is possible to identify patterns in data that are difficult to solve with numerical information.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 40%|████ | 2400/5978 [27:56<41:11, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 2400 of 5978. Loss:0.4223511219024658.\n", "\n", "\n", "Example output: How can I improve my learning habits, so that the material I'm studying is stored in my memory more effectively.One of the most effective ways to improve your learning habits is to set small achievable goals. Break down your learning into smaller chunks and focus on mastering the material you are learning. This will help to stay motivated to study and make progress. Additionally, make sure to practice with native materials to help develop your understanding of the language and familiarize yourself with what you are learning. Finally, look for resources like books, podcasts, and websites that are designed to be visual and engaging, allowing you to experience the language from different angles and perspectives.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 47%|████▋ | 2800/5978 [32:37<36:35, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 2800 of 5978. Loss:0.7402357459068298.\n", "\n", "\n", "Example output: What is neoliberalism and what are its key tenets?Foliberalism is an ideology that emphasizes the importance of individual freedom and autonomy, while recognizing the importance of developing policies and technologies that guarantee social, economic, and political stability. It is often associated with economic deregulation, deregulation of industries and industries on a large scale, deregulation of industries within governments, deregulation of industries within state and local governments, and deregulation of industries within global companies. This ideology involves preserving individual freedom and autonomy, while encouraging technological advancements and exploring the world around us. It is often associated with the deregulation of the economy and the deregulation of the economy in general, as well as deregulation of government services and the deregulation of government and industries. Neoliberalism also involves the deregulation of political and social systems, including deregulation of democratic reforms, deregulation of laws, and deregulation of industries to ensure a strong welfare state. It is often associated with a free-market perspective, and the deregulation of individual and collective freedom and autonomy.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 54%|█████▎ | 3200/5978 [37:21<31:58, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 3200 of 5978. Loss:0.5701470971107483.\n", "\n", "\n", "Example output: Write a brief paragraph of the benefits of attending Arizona State UniversityArizona State University is one of the oldest universities in the United States, dating back to 1867. It is a private institution with over 200 degree programs. It is also one of the best universities in the United States for those with a degree or master's degree in mathematics, economics, research science, public health, or education. Arizona State University offers its students a wide range of program offerings, including the renowned Stanford University University, University of Southern California, Arizona State University, Arizona State University, Arizona State University. Arizona State University offers its students a wide range of program offerings, including the renowned Stanford University University, University of Southern California, Arizona State University, Arizona State University, Arizona State University. Phoenix and Arizona are both located in Arizona and offer a range of student activities such as Arizona State University School of Business, Arizona State University School of Law, Arizona State University School of State University, and Arizona State University College of Business\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 60%|██████ | 3600/5978 [42:05<27:21, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 3600 of 5978. Loss:0.7429184913635254.\n", "\n", "\n", "Example output: How do I organize my home office.To organize your home office, you will need to find a storage space, decide what items you need and determine what you will get in exchange. Then, decide what features you want the office to have such as a filing system, tools, and materials. After that, you can decide if you want to keep everything in the closet or invest in some organizers. Once you have your space organized, you can start by sorting the items by type. This will help make use of the space better, such as organizing small items like the necessary items like coffee and paper. Finally, you can set realistic goals for the items you want and determine how much you need each one.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 67%|██████▋ | 4000/5978 [46:47<22:45, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 4000 of 5978. Loss:0.7125013470649719.\n", "\n", "\n", "Example output: Is violence good sometimes?As a manager, what is good for you is not always good for you, and what is bad sometimes is not necessarily bad in the long run. This could apply to conflicts or disputes, relationships with others or time in the workplace. It's important to acknowledge that it's okay to be willing to take some calculated risk or involve yourself in an argument to reach a resolution or finding a mutually beneficial solution. It's okay to be willing to try different strategies and to be open to finding ways to avoid conflict. It's also important to try to prioritize finding a productive solution and finding ways to communicate better and compromise when needed.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 74%|███████▎ | 4400/5978 [51:29<18:09, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 4400 of 5978. Loss:0.9460142850875854.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r 74%|███████▎ | 4401/5978 [51:31<27:22, 1.04s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Example output: Classify each item as being at a playground or a gym: slide, swing, dumbbell, squat rack, seesawSlide: playground\n", "Swing: Gym\n", "Dumbbell: Gym\n", "Squat Rack: Gym\n", "Seesaw: Gym\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 80%|████████ | 4800/5978 [56:06<13:33, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 4800 of 5978. Loss:0.8724031448364258.\n", "\n", "\n", "Example output: What are some useful strategies for solving a Rubik's cube.Crackers \n", "2. Turn the cube clockwise by moving one of the bottom pieces clockwise. \n", "3. Turn the cube clockwise by moving one of the bottom pieces clockwise. \n", "4. Turn the cube clockwise by moving one of the bottom pieces clockwise. \n", "5. Turn the cube clockwise by moving one of the bottom pieces clockwise. \n", "6. Turn the cube clockwise by moving one of the bottom pieces clockwise. \n", "7. Turn the cube clockwise by moving one of the bottom pieces clockwise. \n", "8. Turn the cube clockwise by moving one of the bottom pieces clockwise. \n", "9. Turn the cube clockwise by moving one of the bottom pieces clockwise. \n", "10 Turn the cube clockwise by moving one of the bottom pieces clockwise.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 87%|████████▋ | 5200/5978 [1:00:50<08:57, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 5200 of 5978. Loss:0.5923346281051636.\n", "\n", "\n", "Example output: How much should I save each month to purchase a house in five years.The amount you should save each month to purchase a house in five years will vary based on the size of your home, as well as the condition of the property and the financial commitment you need to commit. However, you may want to set a goal that limits the amount of time you need to save each month to purchase a house. Additionally, make sure to factor in any unexpected expenses you may incur, such as insurance or maintenance.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 94%|█████████▎| 5600/5978 [1:05:30<04:20, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 5600 of 5978. Loss:0.5626195669174194.\n", "\n", "\n", "Example output: I heard that meditation can be beneficial, do you know of any tips.Yes, regular meditation can be beneficial. Studies have found that regular physical activity can help keep the body moving and stay in shape. Regular physical activity can also help maintain a healthy diet and may also support other health benefits. Additionally, learning relaxation techniques such as deep breathing, progressive muscle relaxation, and progressive muscle relaxation (PMA) can help keep the body relaxed and focused on a task. Finally, talking to a friend or counselor can help create a supportive environment for regular meditation or other stress management techniques.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 5978/5978 [1:09:55<00:00, 1.42it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Average Training Loss: 0.6872660141050636.\n", "\n", "\n", "Validation loss: 0.6161282828427795.\n", "\n", "\n", "Beginning epoch 2 of 2\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 7%|▋ | 400/5978 [04:36<1:04:12, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 400 of 5978. Loss:0.3264784812927246.\n", "\n", "\n", "Example output: What are the signs of an ingrown toenail.Common signs of an toenail toenail include pain when urinating, limited range of motion when urinating, and an urge to urinate when urinating. Other signs can include nausea, constipation, and tiredness.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 13%|█▎ | 800/5978 [09:14<59:36, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 800 of 5978. Loss:0.560775876045227.\n", "\n", "\n", "Example output: What is the best way to plan a vacation on a budget.The best way to plan a vacation on a budget is to compare prices across different sites, look for deals and discounts, and plan to have enough cash to make the most of your vacation. Additionally, consider budgeting for food, entertainment, and travel, and consider staying at a cheaper or better accommodation. Finally, if you need help staying within your budget, consider visiting a local local community center or community center with affordable or free activities or entertainment.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 20%|██ | 1200/5978 [13:54<54:58, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 1200 of 5978. Loss:0.41560453176498413.\n", "\n", "\n", "Example output: What is the easiest way to learn a language by myself.The easiest way to learn a language by yourself is to find a language learning program that offers tutorials and activities to help you learn the language. These programs usually provide comprehensive lessons, with examples and exercises to help you understand the language. Additionally, many provide virtual courses, which provide content and exercises in a more streamlined way than a tutorial.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 27%|██▋ | 1600/5978 [18:33<50:23, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 1600 of 5978. Loss:0.35295894742012024.\n", "\n", "\n", "Example output: What are some tips and tricks for mastering a computer game.Make sure to practice regularly - even if it’s only for a few minutes each day. \n", "2 Break down the game into manageable chunks and set achievable goals. \n", "3 Participate in team discussions and work together to solve problems.\n", "4 Identify any issues you may have and delegate the tasks to the team.\n", "5 Take breaks, and keep yourself motivated.\n", "6 Take breaks from work to reward yourself.\n", "7 Find a comfortable, quiet space and focus on learning the game.\n", "8 Test yourself often to make sure you are having fun and enjoying the game.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 33%|███▎ | 2000/5978 [23:15<45:47, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 2000 of 5978. Loss:0.5085460543632507.\n", "\n", "\n", "Example output: What activities can I do with my family to stay active.Some activities you can do with your family to stay active include playing active video games, doing yoga, taking a nature walk, doing a workout, going for a hike, or biking. You can also watch a movie or read a book together, or listen to music.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 40%|████ | 2400/5978 [27:53<41:10, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 2400 of 5978. Loss:0.458877831697464.\n", "\n", "\n", "Example output: Which country has the best soccer team in the world.The answer to this will depend on who you ask. Some people may point to countries like Germany, Italy, or Spain as the best soccer teams to play in. Each has its own strengths and weaknesses and will create its own answer. For example, some people may point to Argentina as the best soccer team to play in because of their technical abilities, while others may point to Brazil as the best soccer team to play in based on their passion and passion for their sport. Ultimately, it really comes down to what you are looking for when evaluating a soccer team and who you are interested in playing with.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 47%|████▋ | 2800/5978 [32:35<36:34, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 2800 of 5978. Loss:0.8314157724380493.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r 47%|████▋ | 2801/5978 [32:36<48:37, 1.09it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Example output: What are some words that rhyme with orangeorange, orange, orange, orange, orange, orange, orange, orange, orange\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 54%|█████▎ | 3200/5978 [37:12<31:58, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 3200 of 5978. Loss:0.5375759601593018.\n", "\n", "\n", "Example output: What are some beginner tips for learning guitar.Start with simple songs such as 'Happy on Their Own' by Phish.\n", "2. Learn chords and scales on guitar.\n", "3. Practice and refine your technique.\n", "4. Learn improvisation and practice scales and chords together.\n", "5. Learn to play along with songs.\n", "6. Learn to play along with songs written by other musicians.\n", "7. Listen to recordings of professional guitarists.\n", "8. Use online lessons and tutorials to learn the basics.\n", "9. Play and enjoy playing guitar.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 60%|██████ | 3600/5978 [41:52<27:21, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 3600 of 5978. Loss:0.5550445914268494.\n", "\n", "\n", "Example output: I'm applying for a new job and need to know what to expect in an interview in my industry.In an interview for your industry, you should expect the interviewer to ask a variety of thoughtful questions to demonstrate that you understand and demonstrate your enthusiasm for the role. Additionally, it is important to be honest, clear, and respectful throughout the interview.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 67%|██████▋ | 4000/5978 [46:31<22:45, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 4000 of 5978. Loss:0.33791348338127136.\n", "\n", "\n", "Example output: What are the best YouTube channels for cooking tutorials.The best YouTube channels for cooking tutorials are those that provide step-by-step instructions for simple cooking projects. They usually focus on food basics like boiling, roasting, or grilling, and usually have tutorials on the basics of basic recipes. YouTube also has channels that offer cooking tutorials in a variety of cuisines, from traditional Chinese to West African. Additionally, YouTube has a wealth of cooking instructional materials ranging from video tutorials to infographics and much more. Good luck.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 74%|███████▎ | 4400/5978 [51:11<18:09, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 4400 of 5978. Loss:0.47749656438827515.\n", "\n", "\n", "Example output: Can you provide a recipe for making tomato soup.Ingredients:\n", "- 3 cups diced onion\n", "- 1 onion, diced\n", "- 2 cloves garlic, minced\n", "- 1 pound beef stew meat\n", "- 1 teaspoon Worcestershire sauce\n", "- 1 teaspoon dried basil\n", "- 1 teaspoon dried thyme\n", "- 1 teaspoon ground coriander\n", "- 1/2 teaspoon paprika\n", "- 1/2 teaspoon cumin\n", "- 1/4 teaspoon garlic powder\n", "- Salt and pepper, to taste\n", "\n", "Instructions:\n", "1. Heat a large pot over medium heat.\n", "2. Add the onion, garlic, and beef. Cook until the beef is softened and the onion is fragrant, about 15 minutes.\n", "3. Add the Worcestershire sauce and basil, and cook for 1 minute more.\n", "4. Add the thyme, coriander, paprika, and cumin and cook for 1 minute more.\n", "5. Stir in the soup broth\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 80%|████████ | 4800/5978 [55:55<13:33, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 4800 of 5978. Loss:0.4787990152835846.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r 80%|████████ | 4801/5978 [55:57<23:44, 1.21s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Example output: Can you explain updateless decision theory?UDT is a decision theory that aims to solve an issue present in decision-making theories, which is the need to consider the decision maker as a component of the environment in which the decisions are made.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 87%|████████▋ | 5200/5978 [1:00:33<08:57, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 5200 of 5978. Loss:0.311775803565979.\n", "\n", "\n", "Example output: I'm having trouble with algebra, can you help me solve this equation.I'm sorry, I'm not able to help you with algebra equations. First, let's look at some examples. Let's consider an equation with solutions to it. What is the equation y=x+2 and what is the second equation y=2+x? First, we need to know what is the x-coordinates of the two equations. Then we need to know the solutions for the first equation. Once we have all of this information, we can use algebra to solve the equation y=2+x.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " 94%|█████████▎| 5600/5978 [1:05:13<04:20, 1.45it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Batch 5600 of 5978. Loss:0.44621706008911133.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\r 94%|█████████▎| 5601/5978 [1:05:15<06:27, 1.03s/it]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Example output: Can you define data augmentation?Data Augmentation is a method that generates \"fake samples\" by modifying the training data to create synthetic instances of the true data.\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 5978/5978 [1:09:35<00:00, 1.43it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Average Training Loss: 0.46858145299066306.\n", "\n", "\n", "Validation loss: 0.5825508199688187.\n", "\n", "Training complete!\n" ] }, { "data": { "text/plain": [ "('/content/drive/MyDrive/Colab Notebooks/Aira-774M/tokenizer_config.json',\n", " '/content/drive/MyDrive/Colab Notebooks/Aira-774M/special_tokens_map.json',\n", " '/content/drive/MyDrive/Colab Notebooks/Aira-774M/vocab.json',\n", " '/content/drive/MyDrive/Colab Notebooks/Aira-774M/merges.txt',\n", " '/content/drive/MyDrive/Colab Notebooks/Aira-774M/added_tokens.json')" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from codecarbon import EmissionsTracker\n", "import tqdm\n", "\n", "output_dir = f'./Aira-{model_size}'\n", "\n", "tracker = EmissionsTracker(\n", " project_name=\"Aira_emissions\",\n", " log_level=\"critical\",\n", " output_dir=output_dir,\n", " output_file=\"Aira_emissions.csv\",\n", ")\n", "\n", "training_stats = []\n", "\n", "tracker.start()\n", "for epoch_i in range(0, epochs):\n", "\n", " print(f'\\nBeginning epoch {epoch_i + 1} of {epochs}\\n')\n", "\n", " total_train_loss = 0\n", "\n", " model.train()\n", "\n", " for step, batch in enumerate(tqdm.tqdm(train_dataloader)):\n", "\n", " b_input_ids = batch[0].to(device)\n", " b_labels = batch[0].to(device)\n", " b_masks = batch[1].to(device)\n", "\n", " model.zero_grad()\n", "\n", " outputs = model(b_input_ids,\n", " labels=b_labels,\n", " attention_mask = b_masks,\n", " token_type_ids=None)\n", "\n", " loss = outputs[0]\n", "\n", " batch_loss = loss.item()\n", " total_train_loss += batch_loss\n", "\n", " if step % sample_every == 0 and not step == 0:\n", "\n", " print(f'\\nBatch {step} of {len(train_dataloader)}. Loss:{batch_loss}.\\n')\n", "\n", " model.eval()\n", "\n", " inputs = tokenizer(tokenizer.bos_token + df.prompt.sample().iloc[0] + tokenizer.eos_token, return_tensors=\"pt\").to(device)\n", "\n", " sample_outputs = model.generate(**inputs,\n", " bos_token_id=tokenizer.bos_token_id,\n", " pad_token_id=tokenizer.pad_token_id,\n", " eos_token_id=tokenizer.eos_token_id,\n", " do_sample=True,\n", " top_k=50,\n", " max_length = 200,\n", " top_p=0.95,\n", " num_return_sequences=1)\n", "\n", " for i, sample_output in enumerate(sample_outputs):\n", " print(f'\\nExample output: {tokenizer.decode(sample_output, skip_special_tokens=True)}\\n')\n", "\n", " model.train()\n", "\n", " loss.backward()\n", "\n", " optimizer.step()\n", "\n", " scheduler.step()\n", "\n", " avg_train_loss = total_train_loss / len(train_dataloader)\n", "\n", "\n", " print(f'\\nAverage Training Loss: {avg_train_loss}.\\n')\n", "\n", " model.eval()\n", "\n", " total_eval_loss = 0\n", " nb_eval_steps = 0\n", "\n", " for batch in validation_dataloader:\n", "\n", " b_input_ids = batch[0].to(device)\n", " b_labels = batch[0].to(device)\n", " b_masks = batch[1].to(device)\n", "\n", " with torch.no_grad():\n", "\n", " outputs = model(b_input_ids,\n", " attention_mask = b_masks,\n", " labels=b_labels)\n", "\n", " loss = outputs[0]\n", "\n", " batch_loss = loss.item()\n", " total_eval_loss += batch_loss\n", "\n", " avg_val_loss = total_eval_loss / len(validation_dataloader)\n", "\n", "\n", " print(f'\\nValidation loss: {avg_val_loss}.\\n')\n", "\n", " training_stats.append(\n", " {\n", " 'epoch': epoch_i + 1,\n", " 'Training Loss': avg_train_loss,\n", " 'Valid. Loss': avg_val_loss,\n", " }\n", " )\n", "\n", "tracker.stop()\n", "print(\"Training complete!\")\n", "\n", "df_stats = pd.DataFrame(data=training_stats)\n", "df_stats = df_stats.set_index('epoch')\n", "df_stats.to_parquet(f\"{output_dir}/training_stats.parquet\", compression=\"gzip\")\n", "\n", "model_to_save = model.module if hasattr(model, 'module') else model\n", "model_to_save.save_pretrained(output_dir)\n", "tokenizer.save_pretrained(output_dir)" ] }, { "cell_type": "markdown", "metadata": { "id": "h48iOsqie8rx" }, "source": [ "13. Check the training stats and plot the learning curves." ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 439 }, "id": "J1-hAY9Av4gT", "outputId": "d8909676-fba0-470f-80cd-22d29cf2ecb1" }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "df_stats = pd.read_parquet(f\"{output_dir}/training_stats.parquet\")\n", "\n", "# Use plot styling from seaborn.\n", "sns.set(style='darkgrid')\n", "\n", "# Increase the plot size and font size.\n", "sns.set(font_scale=1.5)\n", "plt.rcParams[\"figure.figsize\"] = (12,6)\n", "\n", "# Plot the learning curve.\n", "plt.plot(df_stats['Training Loss'], 'b-o', label=\"Training\")\n", "plt.plot(df_stats['Valid. Loss'], 'g-o', label=\"Validation\")\n", "\n", "# Label the plot.\n", "plt.title(\"Training & Validation Loss\")\n", "plt.xlabel(\"Epoch\")\n", "plt.ylabel(\"Loss\")\n", "plt.legend()\n", "plt.xticks([1, 2, 3, 4, 5])\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "id": "dXX4z7Wqe8ry" }, "source": [ "13. Load and test the model." ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "mpBJNtYuAY9D", "outputId": "76ec7e63-ee22-45c4-b690-60ac501fbbf4" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Enter your question: What is the capital of France?\n", "Question: 👤 What is the capital of France?\n", "\n", "Response 1: 🤖 The capital of France is Paris.\n", "Response 2: 🤖 The capital of France is Paris.\n" ] } ], "source": [ "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "import torch\n", "\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "\n", "tokenizer = AutoTokenizer.from_pretrained(output_dir)\n", "aira = AutoModelForCausalLM.from_pretrained(output_dir)\n", "\n", "aira.eval()\n", "aira.to(device)\n", "\n", "question = input(\"Enter your question: \")\n", "\n", "inputs = tokenizer(tokenizer.bos_token + question + tokenizer.eos_token, return_tensors=\"pt\").to(device)\n", "\n", "responses = aira.generate(**inputs,\n", " bos_token_id=tokenizer.bos_token_id,\n", " pad_token_id=tokenizer.pad_token_id,\n", " eos_token_id=tokenizer.eos_token_id,\n", " do_sample=True,\n", " top_k=50,\n", " max_length=200,\n", " top_p=0.95,\n", " temperature=0.7,\n", " num_return_sequences=2)\n", "\n", "print(f\"Question: 👤 {question}\\n\")\n", "\n", "for i, response in enumerate(responses):\n", " # print only the response and remove the question\n", " print(f'Response {i+1}: 🤖 {tokenizer.decode(response, skip_special_tokens=True).replace(question, \"\")}')\n" ] }, { "cell_type": "markdown", "metadata": { "id": "cJXlHi2oe8rz" }, "source": [ "Done! 🤗" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "A100", "machine_shape": "hm", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "0659624c4b9a4ce8b396b8d45814c580": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ab4d80ebe66546ec92877f3288729bcd", "placeholder": "​", "style": "IPY_MODEL_e1760e7d52464351ba391e002132a0bf", "value": " 456k/456k [00:00<00:00, 1.08MB/s]" } }, "0fc39225b0694840b7f421f725c135f2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_4886d255282b4eefa37e7a1db102a132", "IPY_MODEL_7a37f293f6884093ab588c9ded32db07", "IPY_MODEL_bd0f1d0070294b6eaebba425e9224150" ], "layout": "IPY_MODEL_7cccbb84d04b4aaaa09309bb669f349d" } }, "10d2880b45814382951aa1015f4f1f5f": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "129c206371304edd9eaf34a5b56a00b8": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "13c74818a5b949438589a0c73007e5a6": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1bb5acd98f244616905e0f5cb8752a8a": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "1e2b0f9db8654f85938565d9f694030d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2231293622594eceb80d6481799ddcad": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_c1135b0eb85044ab8e47e772e0c71f3c", "IPY_MODEL_41b5bf9be3394e728e3f3699026ce885", "IPY_MODEL_96552a3dbfa94bb4818b054b2675c6b8" ], "layout": "IPY_MODEL_13c74818a5b949438589a0c73007e5a6" } }, "2cdd431ccdb64e409229913cc43e2544": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2d1b573ef7a242c295ed42d790a71f1e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "30ce9a42524446edbf99a1147377ae8e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5b053f2aac4e42e09c446691cbe67416", "placeholder": "​", "style": "IPY_MODEL_62ebb5cbcbe94e0bb416896ec8493b5c", "value": "Downloading (…)lve/main/config.json: 100%" } }, "3181ccb3dde340c1b5c67ed12076c897": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3657a339af424c51ac7534f91fcb5893": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "41b5bf9be3394e728e3f3699026ce885": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b27100866f8b4c08a7931f4d3d84f2b3", "max": 1042301, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_47472c3a381f4eca9af9d03df1718065", "value": 1042301 } }, "433e467610674781b011918357e68baa": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_66e3178dae0d4b78ac3e00374ebd8e6b", "placeholder": "​", "style": "IPY_MODEL_47449e8feed84a5cb428b2ac0006a053", "value": "Downloading (…)olve/main/merges.txt: 100%" } }, "47449e8feed84a5cb428b2ac0006a053": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "47472c3a381f4eca9af9d03df1718065": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "4886d255282b4eefa37e7a1db102a132": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2cdd431ccdb64e409229913cc43e2544", "placeholder": "​", "style": "IPY_MODEL_ba5408f14ed54e75a9724d39ac67d5c2", "value": "Downloading model.safetensors: 100%" } }, "4a22e5e3eb354b4cb63bf0aeb9105efa": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "4cf2d43d7ae24582ba035d9770f8e860": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "57a05e39d49a4b4b853381676bd7ec9d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_69d7726ac4804227ab1b1c026ca29c2b", "max": 456318, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_ba9429a433b5479eae6e188f1c5b864f", "value": 456318 } }, "5b053f2aac4e42e09c446691cbe67416": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "601e6dedd4a64ddcbeb5ead5b20c70aa": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "62ebb5cbcbe94e0bb416896ec8493b5c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6539b916daa747adab05dcff94c2cd3c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_433e467610674781b011918357e68baa", "IPY_MODEL_57a05e39d49a4b4b853381676bd7ec9d", "IPY_MODEL_0659624c4b9a4ce8b396b8d45814c580" ], "layout": "IPY_MODEL_7ebf18e21686429391b47866e46f8c6d" } }, "66e3178dae0d4b78ac3e00374ebd8e6b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "69d7726ac4804227ab1b1c026ca29c2b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "69e756a204214d689240aef2b9986118": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "6bab36d7ee934127a80a53ab87e0e508": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "75ef15d0ffba4344bc07a22690a99f13": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_acbc045dd45c41c484e59304a66336f4", "placeholder": "​", "style": "IPY_MODEL_b8b4e3e26208445e91aa15ee3ea6e6b0", "value": " 666/666 [00:00<00:00, 63.5kB/s]" } }, "7a37f293f6884093ab588c9ded32db07": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_10d2880b45814382951aa1015f4f1f5f", "max": 3247159078, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_3657a339af424c51ac7534f91fcb5893", "value": 3247159078 } }, "7cccbb84d04b4aaaa09309bb669f349d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "7ddef1b949f04c81b6e26df3ce68cbc6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_89f8e93f887d43ee933fa3b44a44fd72", "max": 124, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_e58aaf5a1b9a4a2f85fa2546ee88f24e", "value": 124 } }, "7ebf18e21686429391b47866e46f8c6d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8691d4c5d8d54e2494d2bf149feb9980": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_129c206371304edd9eaf34a5b56a00b8", "placeholder": "​", "style": "IPY_MODEL_2d1b573ef7a242c295ed42d790a71f1e", "value": "Downloading (…)neration_config.json: 100%" } }, "89f8e93f887d43ee933fa3b44a44fd72": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "96552a3dbfa94bb4818b054b2675c6b8": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9cdd14da170b4b40b029b0150a40afc7", "placeholder": "​", "style": "IPY_MODEL_4cf2d43d7ae24582ba035d9770f8e860", "value": " 1.04M/1.04M [00:00<00:00, 1.66MB/s]" } }, "96de7beae5b94e6a92f9f52b9d824035": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9cdd14da170b4b40b029b0150a40afc7": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ab4d80ebe66546ec92877f3288729bcd": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "acbc045dd45c41c484e59304a66336f4": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b023d77e9b854acd94ac0a91feb2de83": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b27100866f8b4c08a7931f4d3d84f2b3": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b2d0b131237f49e684451afe60b57593": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_8691d4c5d8d54e2494d2bf149feb9980", "IPY_MODEL_7ddef1b949f04c81b6e26df3ce68cbc6", "IPY_MODEL_e125c896502c4ad48b4bfbdbe3dc7383" ], "layout": "IPY_MODEL_1e2b0f9db8654f85938565d9f694030d" } }, "b8b4e3e26208445e91aa15ee3ea6e6b0": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ba5408f14ed54e75a9724d39ac67d5c2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ba9429a433b5479eae6e188f1c5b864f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "bd0f1d0070294b6eaebba425e9224150": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_1bb5acd98f244616905e0f5cb8752a8a", "placeholder": "​", "style": "IPY_MODEL_b023d77e9b854acd94ac0a91feb2de83", "value": " 3.25G/3.25G [00:07<00:00, 507MB/s]" } }, "c1135b0eb85044ab8e47e772e0c71f3c": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_6bab36d7ee934127a80a53ab87e0e508", "placeholder": "​", "style": "IPY_MODEL_69e756a204214d689240aef2b9986118", "value": "Downloading (…)olve/main/vocab.json: 100%" } }, "c119c152c0f3485daebb7cbc5992f13a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_96de7beae5b94e6a92f9f52b9d824035", "max": 666, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_e521d59d193b42a28f29b3ad77c12c96", "value": 666 } }, "e125c896502c4ad48b4bfbdbe3dc7383": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3181ccb3dde340c1b5c67ed12076c897", "placeholder": "​", "style": "IPY_MODEL_4a22e5e3eb354b4cb63bf0aeb9105efa", "value": " 124/124 [00:00<00:00, 10.2kB/s]" } }, "e1760e7d52464351ba391e002132a0bf": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "e2ca1209ffe94778a38a383641b09cb9": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_30ce9a42524446edbf99a1147377ae8e", "IPY_MODEL_c119c152c0f3485daebb7cbc5992f13a", "IPY_MODEL_75ef15d0ffba4344bc07a22690a99f13" ], "layout": "IPY_MODEL_601e6dedd4a64ddcbeb5ead5b20c70aa" } }, "e521d59d193b42a28f29b3ad77c12c96": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "e58aaf5a1b9a4a2f85fa2546ee88f24e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } } } } }, "nbformat": 4, "nbformat_minor": 0 }