nicholasKluge commited on
Commit
8e775fb
1 Parent(s): d996c32

Upload AIRA_FineTuning.ipynb

Browse files
Files changed (1) hide show
  1. AIRA_FineTuning.ipynb +12 -12
AIRA_FineTuning.ipynb CHANGED
@@ -758,14 +758,14 @@
758
  "source": [
759
  "from transformers import GPT2Tokenizer\n",
760
  "\n",
761
- "model = \"pierreguillou/gpt2-small-portuguese\" # \"gpt2\", \"gpt2-medium\", \"gpt2-large\", \"gpt2-xl\", \"pierreguillou/gpt2-small-portuguese\",\n",
762
- "model_size = \"PT-124M\" # \"124M\", \"355M\", \"774M\", and \"1.5B\", \"PT-124M\",\n",
763
  "\n",
764
  "tokenizer = GPT2Tokenizer.from_pretrained(model,\n",
765
- " bos_token='<|startofinstruction|>', # '<|startoftext|>'\n",
766
  " sep_token = '<|endofinstruction|>',\n",
767
- " eos_token='<|endofcompletion|>', # '<|endoftext|>'\n",
768
- " pad_token='<|pad|>') # '<|pad|>'\n",
769
  "\n",
770
  "df['demonstrations'] = tokenizer.bos_token + df['prompt'] + tokenizer.sep_token + df['completion'] + tokenizer.eos_token\n",
771
  "\n",
@@ -830,7 +830,7 @@
830
  "id": "vSqKuRjIe8ru"
831
  },
832
  "source": [
833
- "8. Create the `DataLoaders` and specify the `batch_size`."
834
  ]
835
  },
836
  {
@@ -846,7 +846,7 @@
846
  "dataloader = DataLoader(\n",
847
  " dataset,\n",
848
  " sampler=RandomSampler(dataset),\n",
849
- " batch_size=24, # 32, 20, 8, 4\n",
850
  " )"
851
  ]
852
  },
@@ -856,7 +856,7 @@
856
  "id": "0vxvcTIHe8rv"
857
  },
858
  "source": [
859
- "9. Load the base model (`GPT2LMHeadModel`)."
860
  ]
861
  },
862
  {
@@ -961,7 +961,7 @@
961
  "id": "XAoXD7GVYMN_"
962
  },
963
  "source": [
964
- "10. Freeze some of the layers for constrained fine-tuning. This allows the model to retain some of its original capabilities after the tuning."
965
  ]
966
  },
967
  {
@@ -1020,7 +1020,7 @@
1020
  "id": "GJ1kH3uSe8rw"
1021
  },
1022
  "source": [
1023
- "11. Set the training hyperparameters."
1024
  ]
1025
  },
1026
  {
@@ -1059,7 +1059,7 @@
1059
  "id": "nz1a9lxXe8rw"
1060
  },
1061
  "source": [
1062
- "12. Training/Validation loop. Track the carbon emissions of your work by using `codecarbon`. 🌱"
1063
  ]
1064
  },
1065
  {
@@ -1790,7 +1790,7 @@
1790
  "id": "h48iOsqie8rx"
1791
  },
1792
  "source": [
1793
- "13. Check the training stats and plot the learning curves."
1794
  ]
1795
  },
1796
  {
 
758
  "source": [
759
  "from transformers import GPT2Tokenizer\n",
760
  "\n",
761
+ "model = \"pierreguillou/gpt2-small-portuguese\" \n",
762
+ "model_size = \"PT-124M\" \n",
763
  "\n",
764
  "tokenizer = GPT2Tokenizer.from_pretrained(model,\n",
765
+ " bos_token='<|startofinstruction|>', \n",
766
  " sep_token = '<|endofinstruction|>',\n",
767
+ " eos_token='<|endofcompletion|>', \n",
768
+ " pad_token='<|pad|>') \n",
769
  "\n",
770
  "df['demonstrations'] = tokenizer.bos_token + df['prompt'] + tokenizer.sep_token + df['completion'] + tokenizer.eos_token\n",
771
  "\n",
 
830
  "id": "vSqKuRjIe8ru"
831
  },
832
  "source": [
833
+ "7. Create the `DataLoaders` and specify the `batch_size`."
834
  ]
835
  },
836
  {
 
846
  "dataloader = DataLoader(\n",
847
  " dataset,\n",
848
  " sampler=RandomSampler(dataset),\n",
849
+ " batch_size=24, \n",
850
  " )"
851
  ]
852
  },
 
856
  "id": "0vxvcTIHe8rv"
857
  },
858
  "source": [
859
+ "8. Load the base model (`GPT2LMHeadModel`)."
860
  ]
861
  },
862
  {
 
961
  "id": "XAoXD7GVYMN_"
962
  },
963
  "source": [
964
+ "9. Freeze some of the layers for constrained fine-tuning. This allows the model to retain some of its original capabilities after the tuning."
965
  ]
966
  },
967
  {
 
1020
  "id": "GJ1kH3uSe8rw"
1021
  },
1022
  "source": [
1023
+ "10. Set the training hyperparameters."
1024
  ]
1025
  },
1026
  {
 
1059
  "id": "nz1a9lxXe8rw"
1060
  },
1061
  "source": [
1062
+ "11. Training/Validation loop. Track the carbon emissions of your work by using `codecarbon`. 🌱"
1063
  ]
1064
  },
1065
  {
 
1790
  "id": "h48iOsqie8rx"
1791
  },
1792
  "source": [
1793
+ "12. Check the training stats and plot the learning curves."
1794
  ]
1795
  },
1796
  {