crscardellino commited on
Commit
1f5c8b8
1 Parent(s): dd15d4a

Temporal commit to save training output

Browse files
Files changed (1) hide show
  1. flisol-cordoba-2023.ipynb +256 -21
flisol-cordoba-2023.ipynb CHANGED
@@ -556,7 +556,7 @@
556
  },
557
  {
558
  "cell_type": "code",
559
- "execution_count": null,
560
  "id": "17f2884d",
561
  "metadata": {
562
  "slideshow": {
@@ -589,14 +589,34 @@
589
  },
590
  {
591
  "cell_type": "code",
592
- "execution_count": null,
593
  "id": "322a4a9b",
594
  "metadata": {
595
  "slideshow": {
596
  "slide_type": "fragment"
597
  }
598
  },
599
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
600
  "source": [
601
  "torch.manual_seed(42) # To ensure determinism\n",
602
  "\n",
@@ -626,14 +646,51 @@
626
  },
627
  {
628
  "cell_type": "code",
629
- "execution_count": null,
630
  "id": "5a27197e",
631
  "metadata": {
632
  "slideshow": {
633
  "slide_type": "fragment"
634
  }
635
  },
636
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
637
  "source": [
638
  "from datasets import load_dataset\n",
639
  "\n",
@@ -659,7 +716,7 @@
659
  },
660
  {
661
  "cell_type": "code",
662
- "execution_count": null,
663
  "id": "33059c5f",
664
  "metadata": {
665
  "scrolled": true,
@@ -667,7 +724,16 @@
667
  "slide_type": "fragment"
668
  }
669
  },
670
- "outputs": [],
 
 
 
 
 
 
 
 
 
671
  "source": [
672
  "from utils import tokenize # local module in the repository\n",
673
  "\n",
@@ -695,7 +761,7 @@
695
  },
696
  {
697
  "cell_type": "code",
698
- "execution_count": null,
699
  "id": "3100e195",
700
  "metadata": {
701
  "scrolled": true,
@@ -703,7 +769,16 @@
703
  "slide_type": "fragment"
704
  }
705
  },
706
- "outputs": [],
 
 
 
 
 
 
 
 
 
707
  "source": [
708
  "from functools import partial\n",
709
  "from utils import group_texts # local module in the repository\n",
@@ -734,14 +809,23 @@
734
  },
735
  {
736
  "cell_type": "code",
737
- "execution_count": null,
738
  "id": "b9d33b7b",
739
  "metadata": {
740
  "slideshow": {
741
  "slide_type": "fragment"
742
  }
743
  },
744
- "outputs": [],
 
 
 
 
 
 
 
 
 
745
  "source": [
746
  "print(len(lm_datasets['train'][0]['input_ids']))\n",
747
  "print(lm_datasets['train'][0]['input_ids'][:10])"
@@ -749,7 +833,7 @@
749
  },
750
  {
751
  "cell_type": "code",
752
- "execution_count": null,
753
  "id": "7dfb316d",
754
  "metadata": {
755
  "scrolled": false,
@@ -757,7 +841,37 @@
757
  "slide_type": "fragment"
758
  }
759
  },
760
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
761
  "source": [
762
  "print(tokenizer.decode(lm_datasets[\"train\"][0][\"input_ids\"]))"
763
  ]
@@ -779,14 +893,25 @@
779
  },
780
  {
781
  "cell_type": "code",
782
- "execution_count": null,
783
  "id": "a8b90ba2",
784
  "metadata": {
 
785
  "slideshow": {
786
  "slide_type": "fragment"
787
  }
788
  },
789
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
790
  "source": [
791
  "from huggingface_hub import notebook_login\n",
792
  "\n",
@@ -813,21 +938,118 @@
813
  },
814
  {
815
  "cell_type": "code",
816
- "execution_count": null,
817
- "id": "d43c5555",
818
  "metadata": {
819
  "slideshow": {
820
  "slide_type": "subslide"
821
  }
822
  },
823
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
824
  "source": [
825
  "from transformers import Trainer, TrainingArguments\n",
826
  "\n",
827
  "training_args = TrainingArguments(\n",
828
- " \"flisol-cba-martinfierro\",\n",
829
  " evaluation_strategy=\"epoch\",\n",
830
- " num_train_epochs=15,\n",
831
  " learning_rate=2e-5,\n",
832
  " weight_decay=0.01,\n",
833
  " logging_steps=5\n",
@@ -840,7 +1062,20 @@
840
  " eval_dataset=lm_datasets[\"validation\"]\n",
841
  ")\n",
842
  "\n",
843
- "trainer.train()\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
844
  "trainer.push_to_hub() # This pushes the trained model to Hugging Face model repository"
845
  ]
846
  },
 
556
  },
557
  {
558
  "cell_type": "code",
559
+ "execution_count": 1,
560
  "id": "17f2884d",
561
  "metadata": {
562
  "slideshow": {
 
589
  },
590
  {
591
  "cell_type": "code",
592
+ "execution_count": 2,
593
  "id": "322a4a9b",
594
  "metadata": {
595
  "slideshow": {
596
  "slide_type": "fragment"
597
  }
598
  },
599
+ "outputs": [
600
+ {
601
+ "name": "stderr",
602
+ "output_type": "stream",
603
+ "text": [
604
+ "The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
605
+ "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
606
+ ]
607
+ },
608
+ {
609
+ "name": "stdout",
610
+ "output_type": "stream",
611
+ "text": [
612
+ "Aquí me pongo a cantar y a llorar. \n",
613
+ "\n",
614
+ "Los sollozos de Meggie se desvanecen por la noche en el salón. Al parecer no se ve nada. \n",
615
+ "\n",
616
+ "—¿Y si no fuera el final del mundo, el fin de un mundo?\n"
617
+ ]
618
+ }
619
+ ],
620
  "source": [
621
  "torch.manual_seed(42) # To ensure determinism\n",
622
  "\n",
 
646
  },
647
  {
648
  "cell_type": "code",
649
+ "execution_count": 3,
650
  "id": "5a27197e",
651
  "metadata": {
652
  "slideshow": {
653
  "slide_type": "fragment"
654
  }
655
  },
656
+ "outputs": [
657
+ {
658
+ "name": "stderr",
659
+ "output_type": "stream",
660
+ "text": [
661
+ "Found cached dataset text (/home/crscardellino/.cache/huggingface/datasets/text/default-623d9572e8f69157/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n"
662
+ ]
663
+ },
664
+ {
665
+ "data": {
666
+ "application/vnd.jupyter.widget-view+json": {
667
+ "model_id": "0fe0bb8953f24e05b2a56ad08c462976",
668
+ "version_major": 2,
669
+ "version_minor": 0
670
+ },
671
+ "text/plain": [
672
+ " 0%| | 0/2 [00:00<?, ?it/s]"
673
+ ]
674
+ },
675
+ "metadata": {},
676
+ "output_type": "display_data"
677
+ },
678
+ {
679
+ "name": "stdout",
680
+ "output_type": "stream",
681
+ "text": [
682
+ "I - Cantor y Gaucho.\n",
683
+ "\n",
684
+ "1\n",
685
+ "Aquí me pongo a cantar\n",
686
+ "Al compás de la vigüela,\n",
687
+ "Que el hombre que lo desvela\n",
688
+ "Una pena estraordinaria\n",
689
+ "Como la ave solitaria\n",
690
+ "Con el cantar se consuela.\n"
691
+ ]
692
+ }
693
+ ],
694
  "source": [
695
  "from datasets import load_dataset\n",
696
  "\n",
 
716
  },
717
  {
718
  "cell_type": "code",
719
+ "execution_count": 4,
720
  "id": "33059c5f",
721
  "metadata": {
722
  "scrolled": true,
 
724
  "slide_type": "fragment"
725
  }
726
  },
727
+ "outputs": [
728
+ {
729
+ "name": "stderr",
730
+ "output_type": "stream",
731
+ "text": [
732
+ "Loading cached processed dataset at /home/crscardellino/.cache/huggingface/datasets/text/default-623d9572e8f69157/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2/cache-5a0f77d99160fc1c_*_of_00004.arrow\n",
733
+ "Loading cached processed dataset at /home/crscardellino/.cache/huggingface/datasets/text/default-623d9572e8f69157/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2/cache-003d85e2eebe3231_*_of_00004.arrow\n"
734
+ ]
735
+ }
736
+ ],
737
  "source": [
738
  "from utils import tokenize # local module in the repository\n",
739
  "\n",
 
761
  },
762
  {
763
  "cell_type": "code",
764
+ "execution_count": 5,
765
  "id": "3100e195",
766
  "metadata": {
767
  "scrolled": true,
 
769
  "slide_type": "fragment"
770
  }
771
  },
772
+ "outputs": [
773
+ {
774
+ "name": "stderr",
775
+ "output_type": "stream",
776
+ "text": [
777
+ "Loading cached processed dataset at /home/crscardellino/.cache/huggingface/datasets/text/default-623d9572e8f69157/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2/cache-01936c1905752293_*_of_00004.arrow\n",
778
+ "Loading cached processed dataset at /home/crscardellino/.cache/huggingface/datasets/text/default-623d9572e8f69157/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2/cache-af8dcd60a546c28d_*_of_00004.arrow\n"
779
+ ]
780
+ }
781
+ ],
782
  "source": [
783
  "from functools import partial\n",
784
  "from utils import group_texts # local module in the repository\n",
 
809
  },
810
  {
811
  "cell_type": "code",
812
+ "execution_count": 6,
813
  "id": "b9d33b7b",
814
  "metadata": {
815
  "slideshow": {
816
  "slide_type": "fragment"
817
  }
818
  },
819
+ "outputs": [
820
+ {
821
+ "name": "stdout",
822
+ "output_type": "stream",
823
+ "text": [
824
+ "128\n",
825
+ "[50, 1368, 6505, 282, 324, 24275, 526, 23, 208, 208]\n"
826
+ ]
827
+ }
828
+ ],
829
  "source": [
830
  "print(len(lm_datasets['train'][0]['input_ids']))\n",
831
  "print(lm_datasets['train'][0]['input_ids'][:10])"
 
833
  },
834
  {
835
  "cell_type": "code",
836
+ "execution_count": 7,
837
  "id": "7dfb316d",
838
  "metadata": {
839
  "scrolled": false,
 
841
  "slide_type": "fragment"
842
  }
843
  },
844
+ "outputs": [
845
+ {
846
+ "name": "stdout",
847
+ "output_type": "stream",
848
+ "text": [
849
+ "I - Cantor y Gaucho.\n",
850
+ "\n",
851
+ "1\n",
852
+ "Aquí me pongo a cantar\n",
853
+ "Al compás de la vigüela,\n",
854
+ "Que el hombre que lo desvela\n",
855
+ "Una pena estraordinaria\n",
856
+ "Como la ave solitaria\n",
857
+ "Con el cantar se consuela.\n",
858
+ "\n",
859
+ "2\n",
860
+ "Pido a los Santos del Cielo\n",
861
+ "Que ayuden mi pensamiento;\n",
862
+ "Les pido en este momento\n",
863
+ "Que voy a cantar mi historia\n",
864
+ "Me refresquen la memoria\n",
865
+ "Y aclaren mi entendimiento.\n",
866
+ "\n",
867
+ "3\n",
868
+ "Vengan Santos milagrosos,\n",
869
+ "Vengan todos en mi ayuda,\n",
870
+ "Que la lengua se me añuda\n",
871
+ "Y se me turba\n"
872
+ ]
873
+ }
874
+ ],
875
  "source": [
876
  "print(tokenizer.decode(lm_datasets[\"train\"][0][\"input_ids\"]))"
877
  ]
 
893
  },
894
  {
895
  "cell_type": "code",
896
+ "execution_count": 8,
897
  "id": "a8b90ba2",
898
  "metadata": {
899
+ "scrolled": true,
900
  "slideshow": {
901
  "slide_type": "fragment"
902
  }
903
  },
904
+ "outputs": [
905
+ {
906
+ "name": "stdout",
907
+ "output_type": "stream",
908
+ "text": [
909
+ "Token is valid.\n",
910
+ "Your token has been saved to /home/crscardellino/.cache/huggingface/token\n",
911
+ "Login successful\n"
912
+ ]
913
+ }
914
+ ],
915
  "source": [
916
  "from huggingface_hub import notebook_login\n",
917
  "\n",
 
938
  },
939
  {
940
  "cell_type": "code",
941
+ "execution_count": 9,
942
+ "id": "3b121d21",
943
  "metadata": {
944
  "slideshow": {
945
  "slide_type": "subslide"
946
  }
947
  },
948
+ "outputs": [
949
+ {
950
+ "name": "stderr",
951
+ "output_type": "stream",
952
+ "text": [
953
+ "/home/crscardellino/Projects/research/flisol/flisol-cba-martin-fierro/venv/lib/python3.10/site-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
954
+ " warnings.warn(\n"
955
+ ]
956
+ },
957
+ {
958
+ "data": {
959
+ "text/html": [
960
+ "\n",
961
+ " <div>\n",
962
+ " \n",
963
+ " <progress value='180' max='180' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
964
+ " [180/180 11:44, Epoch 10/10]\n",
965
+ " </div>\n",
966
+ " <table border=\"1\" class=\"dataframe\">\n",
967
+ " <thead>\n",
968
+ " <tr style=\"text-align: left;\">\n",
969
+ " <th>Epoch</th>\n",
970
+ " <th>Training Loss</th>\n",
971
+ " <th>Validation Loss</th>\n",
972
+ " </tr>\n",
973
+ " </thead>\n",
974
+ " <tbody>\n",
975
+ " <tr>\n",
976
+ " <td>1</td>\n",
977
+ " <td>4.386400</td>\n",
978
+ " <td>4.202457</td>\n",
979
+ " </tr>\n",
980
+ " <tr>\n",
981
+ " <td>2</td>\n",
982
+ " <td>3.948000</td>\n",
983
+ " <td>4.043974</td>\n",
984
+ " </tr>\n",
985
+ " <tr>\n",
986
+ " <td>3</td>\n",
987
+ " <td>3.796200</td>\n",
988
+ " <td>3.980350</td>\n",
989
+ " </tr>\n",
990
+ " <tr>\n",
991
+ " <td>4</td>\n",
992
+ " <td>3.610500</td>\n",
993
+ " <td>3.945783</td>\n",
994
+ " </tr>\n",
995
+ " <tr>\n",
996
+ " <td>5</td>\n",
997
+ " <td>3.444400</td>\n",
998
+ " <td>3.927984</td>\n",
999
+ " </tr>\n",
1000
+ " <tr>\n",
1001
+ " <td>6</td>\n",
1002
+ " <td>3.385500</td>\n",
1003
+ " <td>3.919229</td>\n",
1004
+ " </tr>\n",
1005
+ " <tr>\n",
1006
+ " <td>7</td>\n",
1007
+ " <td>3.314200</td>\n",
1008
+ " <td>3.909090</td>\n",
1009
+ " </tr>\n",
1010
+ " <tr>\n",
1011
+ " <td>8</td>\n",
1012
+ " <td>3.219200</td>\n",
1013
+ " <td>3.907399</td>\n",
1014
+ " </tr>\n",
1015
+ " <tr>\n",
1016
+ " <td>9</td>\n",
1017
+ " <td>3.161500</td>\n",
1018
+ " <td>3.906959</td>\n",
1019
+ " </tr>\n",
1020
+ " <tr>\n",
1021
+ " <td>10</td>\n",
1022
+ " <td>3.163700</td>\n",
1023
+ " <td>3.906726</td>\n",
1024
+ " </tr>\n",
1025
+ " </tbody>\n",
1026
+ "</table><p>"
1027
+ ],
1028
+ "text/plain": [
1029
+ "<IPython.core.display.HTML object>"
1030
+ ]
1031
+ },
1032
+ "metadata": {},
1033
+ "output_type": "display_data"
1034
+ },
1035
+ {
1036
+ "data": {
1037
+ "text/plain": [
1038
+ "TrainOutput(global_step=180, training_loss=3.5808190133836533, metrics={'train_runtime': 707.4357, 'train_samples_per_second': 1.951, 'train_steps_per_second': 0.254, 'total_flos': 90145751040000.0, 'train_loss': 3.5808190133836533, 'epoch': 10.0})"
1039
+ ]
1040
+ },
1041
+ "execution_count": 9,
1042
+ "metadata": {},
1043
+ "output_type": "execute_result"
1044
+ }
1045
+ ],
1046
  "source": [
1047
  "from transformers import Trainer, TrainingArguments\n",
1048
  "\n",
1049
  "training_args = TrainingArguments(\n",
1050
+ " \"flisol-cba-martin-fierro\",\n",
1051
  " evaluation_strategy=\"epoch\",\n",
1052
+ " num_train_epochs=10,\n",
1053
  " learning_rate=2e-5,\n",
1054
  " weight_decay=0.01,\n",
1055
  " logging_steps=5\n",
 
1062
  " eval_dataset=lm_datasets[\"validation\"]\n",
1063
  ")\n",
1064
  "\n",
1065
+ "trainer.train()"
1066
+ ]
1067
+ },
1068
+ {
1069
+ "cell_type": "code",
1070
+ "execution_count": null,
1071
+ "id": "d43c5555",
1072
+ "metadata": {
1073
+ "slideshow": {
1074
+ "slide_type": "-"
1075
+ }
1076
+ },
1077
+ "outputs": [],
1078
+ "source": [
1079
  "trainer.push_to_hub() # This pushes the trained model to Hugging Face model repository"
1080
  ]
1081
  },