nicholasKluge
/

Aira-2-portuguese-124M

@@ -758,14 +758,14 @@
       "source": [
         "from transformers import GPT2Tokenizer\n",
         "\n",
-        "model = \"pierreguillou/gpt2-small-portuguese\" # \"gpt2\", \"gpt2-medium\", \"gpt2-large\", \"gpt2-xl\", \"pierreguillou/gpt2-small-portuguese\",\n",
-        "model_size = \"PT-124M\" # \"124M\", \"355M\", \"774M\", and \"1.5B\", \"PT-124M\",\n",
         "\n",
         "tokenizer = GPT2Tokenizer.from_pretrained(model,\n",
-        "                                          bos_token='<|startofinstruction|>', # '<|startoftext|>'\n",
         "                                          sep_token = '<|endofinstruction|>',\n",
-        "                                          eos_token='<|endofcompletion|>', # '<|endoftext|>'\n",
-        "                                          pad_token='<|pad|>') # '<|pad|>'\n",
         "\n",
         "df['demonstrations'] = tokenizer.bos_token + df['prompt'] + tokenizer.sep_token + df['completion'] + tokenizer.eos_token\n",
         "\n",
@@ -830,7 +830,7 @@
         "id": "vSqKuRjIe8ru"
       },
       "source": [
-        "8. Create the `DataLoaders` and specify the `batch_size`."
       ]
     },
     {
@@ -846,7 +846,7 @@
         "dataloader = DataLoader(\n",
         "            dataset,\n",
         "            sampler=RandomSampler(dataset),\n",
-        "            batch_size=24, # 32, 20, 8, 4\n",
         "        )"
       ]
     },
@@ -856,7 +856,7 @@
         "id": "0vxvcTIHe8rv"
       },
       "source": [
-        "9. Load the base model (`GPT2LMHeadModel`)."
       ]
     },
     {
@@ -961,7 +961,7 @@
         "id": "XAoXD7GVYMN_"
       },
       "source": [
-        "10. Freeze some of the layers for constrained fine-tuning. This allows the model to retain some of its original capabilities after the tuning."
       ]
     },
     {
@@ -1020,7 +1020,7 @@
         "id": "GJ1kH3uSe8rw"
       },
       "source": [
-        "11. Set the training hyperparameters."
       ]
     },
     {
@@ -1059,7 +1059,7 @@
         "id": "nz1a9lxXe8rw"
       },
       "source": [
-        "12. Training/Validation loop. Track the carbon emissions of your work by using `codecarbon`. 🌱"
       ]
     },
     {
@@ -1790,7 +1790,7 @@
         "id": "h48iOsqie8rx"
       },
       "source": [
-        "13. Check the training stats and plot the learning curves."
       ]
     },
     {

       "source": [
         "from transformers import GPT2Tokenizer\n",
         "\n",
+        "model = \"pierreguillou/gpt2-small-portuguese\" \n",
+        "model_size = \"PT-124M\" \n",
         "\n",
         "tokenizer = GPT2Tokenizer.from_pretrained(model,\n",
+        "                                          bos_token='<|startofinstruction|>', \n",
         "                                          sep_token = '<|endofinstruction|>',\n",
+        "                                          eos_token='<|endofcompletion|>', \n",
+        "                                          pad_token='<|pad|>') \n",
         "\n",
         "df['demonstrations'] = tokenizer.bos_token + df['prompt'] + tokenizer.sep_token + df['completion'] + tokenizer.eos_token\n",
         "\n",
         "id": "vSqKuRjIe8ru"
       },
       "source": [
+        "7. Create the `DataLoaders` and specify the `batch_size`."
       ]
     },
     {
         "dataloader = DataLoader(\n",
         "            dataset,\n",
         "            sampler=RandomSampler(dataset),\n",
+        "            batch_size=24, \n",
         "        )"
       ]
     },
         "id": "0vxvcTIHe8rv"
       },
       "source": [
+        "8. Load the base model (`GPT2LMHeadModel`)."
       ]
     },
     {
         "id": "XAoXD7GVYMN_"
       },
       "source": [
+        "9. Freeze some of the layers for constrained fine-tuning. This allows the model to retain some of its original capabilities after the tuning."
       ]
     },
     {
         "id": "GJ1kH3uSe8rw"
       },
       "source": [
+        "10. Set the training hyperparameters."
       ]
     },
     {
         "id": "nz1a9lxXe8rw"
       },
       "source": [
+        "11. Training/Validation loop. Track the carbon emissions of your work by using `codecarbon`. 🌱"
       ]
     },
     {
         "id": "h48iOsqie8rx"
       },
       "source": [
+        "12. Check the training stats and plot the learning curves."
       ]
     },
     {