crscardellino
commited on
Commit
•
8cbe4b9
1
Parent(s):
f781de0
Updated the notebook
Browse files- flisol-cordoba-2023.ipynb +44 -226
flisol-cordoba-2023.ipynb
CHANGED
@@ -258,7 +258,7 @@
|
|
258 |
},
|
259 |
{
|
260 |
"cell_type": "code",
|
261 |
-
"execution_count":
|
262 |
"id": "0e0d53be",
|
263 |
"metadata": {
|
264 |
"slideshow": {
|
@@ -276,7 +276,9 @@
|
|
276 |
"\n",
|
277 |
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
278 |
"tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)\n",
|
279 |
-
"model = AutoModelForCausalLM.from_pretrained(
|
|
|
|
|
280 |
]
|
281 |
},
|
282 |
{
|
@@ -339,7 +341,7 @@
|
|
339 |
},
|
340 |
{
|
341 |
"cell_type": "code",
|
342 |
-
"execution_count":
|
343 |
"id": "c1227c49",
|
344 |
"metadata": {
|
345 |
"slideshow": {
|
@@ -378,32 +380,14 @@
|
|
378 |
},
|
379 |
{
|
380 |
"cell_type": "code",
|
381 |
-
"execution_count":
|
382 |
"id": "11bec6de",
|
383 |
"metadata": {
|
384 |
"slideshow": {
|
385 |
"slide_type": "fragment"
|
386 |
}
|
387 |
},
|
388 |
-
"outputs": [
|
389 |
-
{
|
390 |
-
"name": "stdout",
|
391 |
-
"output_type": "stream",
|
392 |
-
"text": [
|
393 |
-
"La siguiente es una conversación entre un HUMANO y un bot EXPERTO en software libre.\n",
|
394 |
-
"El EXPERTO le ayuda al HUMANO con preguntas acerca de software libre.\n",
|
395 |
-
"El EXPERTO es conversacional, optimista, flexible, creativo y genera respuestas parecidas a un humano.\n",
|
396 |
-
"\n",
|
397 |
-
"HUMANO: Hola, ¿Cómo estás?\n",
|
398 |
-
"EXPERTO: Hola, pmuy bien. Estoy acá para ayudarte con preguntas respecto al software libre.\n",
|
399 |
-
"\n",
|
400 |
-
"HUMANO: ¿Qué es el software libre?\n",
|
401 |
-
"EXPERTO: El software libre es un software que se puede modificar, redistribuir y distribuir libremente.\n",
|
402 |
-
"HUMANO: ¿En qué consiste la licencia GPL?\n",
|
403 |
-
"EXPERTO: La licencia GPL es una licencia de software libre que permite a los usuarios modificar, redistribuir\n"
|
404 |
-
]
|
405 |
-
}
|
406 |
-
],
|
407 |
"source": [
|
408 |
"MAX_TOKENS = 50\n",
|
409 |
"input_ids = tokenizer.encode(PROMPT, return_tensors=\"pt\").to(device)\n",
|
@@ -433,33 +417,14 @@
|
|
433 |
},
|
434 |
{
|
435 |
"cell_type": "code",
|
436 |
-
"execution_count":
|
437 |
"id": "dc66f288",
|
438 |
"metadata": {
|
439 |
"slideshow": {
|
440 |
"slide_type": "fragment"
|
441 |
}
|
442 |
},
|
443 |
-
"outputs": [
|
444 |
-
{
|
445 |
-
"name": "stdout",
|
446 |
-
"output_type": "stream",
|
447 |
-
"text": [
|
448 |
-
"La siguiente es una conversación entre un HUMANO y un bot EXPERTO en software libre.\n",
|
449 |
-
"El EXPERTO le ayuda al HUMANO con preguntas acerca de software libre.\n",
|
450 |
-
"El EXPERTO es conversacional, optimista, flexible, creativo y genera respuestas parecidas a un humano.\n",
|
451 |
-
"\n",
|
452 |
-
"HUMANO: Hola, ¿Cómo estás?\n",
|
453 |
-
"EXPERTO: Hola, pmuy bien. Estoy acá para ayudarte con preguntas respecto al software libre.\n",
|
454 |
-
"\n",
|
455 |
-
"HUMANO: ¿Qué es el software libre?\n",
|
456 |
-
"EXPERTO: El software libre, es aquel software que esta escrito en un lenguaje de programación que puede ser modificado y copiado por cualquier persona o entidad.\n",
|
457 |
-
"\n",
|
458 |
-
"HUMANO: ¿En general cuáles son los usos que se pueden dar a un software libre?\n",
|
459 |
-
"EXPERTO\n"
|
460 |
-
]
|
461 |
-
}
|
462 |
-
],
|
463 |
"source": [
|
464 |
"torch.manual_seed(42) # To ensure determinism\n",
|
465 |
"\n",
|
@@ -604,7 +569,7 @@
|
|
604 |
},
|
605 |
{
|
606 |
"cell_type": "code",
|
607 |
-
"execution_count":
|
608 |
"id": "17f2884d",
|
609 |
"metadata": {
|
610 |
"slideshow": {
|
@@ -616,6 +581,8 @@
|
|
616 |
"import torch\n",
|
617 |
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
|
618 |
"\n",
|
|
|
|
|
619 |
"BASE_MODEL = \"DeepESP/gpt2-spanish\" # We play with a smaller model\n",
|
620 |
"tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)\n",
|
621 |
"model = AutoModelForCausalLM.from_pretrained(BASE_MODEL).to(device)"
|
@@ -637,26 +604,14 @@
|
|
637 |
},
|
638 |
{
|
639 |
"cell_type": "code",
|
640 |
-
"execution_count":
|
641 |
"id": "322a4a9b",
|
642 |
"metadata": {
|
643 |
"slideshow": {
|
644 |
"slide_type": "fragment"
|
645 |
}
|
646 |
},
|
647 |
-
"outputs": [
|
648 |
-
{
|
649 |
-
"name": "stdout",
|
650 |
-
"output_type": "stream",
|
651 |
-
"text": [
|
652 |
-
"Aquí me pongo a cantar y a llorar. \n",
|
653 |
-
"\n",
|
654 |
-
"Los sollozos de Meggie se desvanecen por la noche en el salón. Al parecer no se ve nada. \n",
|
655 |
-
"\n",
|
656 |
-
"—¿Y si no fuera el final del mundo, el fin de un mundo?\n"
|
657 |
-
]
|
658 |
-
}
|
659 |
-
],
|
660 |
"source": [
|
661 |
"torch.manual_seed(42) # To ensure determinism\n",
|
662 |
"\n",
|
@@ -686,30 +641,14 @@
|
|
686 |
},
|
687 |
{
|
688 |
"cell_type": "code",
|
689 |
-
"execution_count":
|
690 |
"id": "5a27197e",
|
691 |
"metadata": {
|
692 |
"slideshow": {
|
693 |
"slide_type": "fragment"
|
694 |
}
|
695 |
},
|
696 |
-
"outputs": [
|
697 |
-
{
|
698 |
-
"name": "stdout",
|
699 |
-
"output_type": "stream",
|
700 |
-
"text": [
|
701 |
-
"I - Cantor y Gaucho.\n",
|
702 |
-
"\n",
|
703 |
-
"1\n",
|
704 |
-
"Aquí me pongo a cantar\n",
|
705 |
-
"Al compás de la vigüela,\n",
|
706 |
-
"Que el hombre que lo desvela\n",
|
707 |
-
"Una pena estraordinaria\n",
|
708 |
-
"Como la ave solitaria\n",
|
709 |
-
"Con el cantar se consuela.\n"
|
710 |
-
]
|
711 |
-
}
|
712 |
-
],
|
713 |
"source": [
|
714 |
"from datasets import load_dataset\n",
|
715 |
"\n",
|
@@ -740,7 +679,7 @@
|
|
740 |
},
|
741 |
{
|
742 |
"cell_type": "code",
|
743 |
-
"execution_count":
|
744 |
"id": "33059c5f",
|
745 |
"metadata": {
|
746 |
"scrolled": true,
|
@@ -778,7 +717,7 @@
|
|
778 |
},
|
779 |
{
|
780 |
"cell_type": "code",
|
781 |
-
"execution_count":
|
782 |
"id": "3100e195",
|
783 |
"metadata": {
|
784 |
"scrolled": true,
|
@@ -817,23 +756,14 @@
|
|
817 |
},
|
818 |
{
|
819 |
"cell_type": "code",
|
820 |
-
"execution_count":
|
821 |
"id": "b9d33b7b",
|
822 |
"metadata": {
|
823 |
"slideshow": {
|
824 |
"slide_type": "fragment"
|
825 |
}
|
826 |
},
|
827 |
-
"outputs": [
|
828 |
-
{
|
829 |
-
"name": "stdout",
|
830 |
-
"output_type": "stream",
|
831 |
-
"text": [
|
832 |
-
"128\n",
|
833 |
-
"[50, 1368, 6505, 282, 324, 24275, 526, 23, 208, 208]\n"
|
834 |
-
]
|
835 |
-
}
|
836 |
-
],
|
837 |
"source": [
|
838 |
"print(len(lm_datasets[\"train\"][0][\"input_ids\"]))\n",
|
839 |
"print(lm_datasets[\"train\"][0][\"input_ids\"][:10])"
|
@@ -841,44 +771,14 @@
|
|
841 |
},
|
842 |
{
|
843 |
"cell_type": "code",
|
844 |
-
"execution_count":
|
845 |
"id": "7dfb316d",
|
846 |
"metadata": {
|
847 |
"slideshow": {
|
848 |
"slide_type": "fragment"
|
849 |
}
|
850 |
},
|
851 |
-
"outputs": [
|
852 |
-
{
|
853 |
-
"name": "stdout",
|
854 |
-
"output_type": "stream",
|
855 |
-
"text": [
|
856 |
-
"I - Cantor y Gaucho.\n",
|
857 |
-
"\n",
|
858 |
-
"1\n",
|
859 |
-
"Aquí me pongo a cantar\n",
|
860 |
-
"Al compás de la vigüela,\n",
|
861 |
-
"Que el hombre que lo desvela\n",
|
862 |
-
"Una pena estraordinaria\n",
|
863 |
-
"Como la ave solitaria\n",
|
864 |
-
"Con el cantar se consuela.\n",
|
865 |
-
"\n",
|
866 |
-
"2\n",
|
867 |
-
"Pido a los Santos del Cielo\n",
|
868 |
-
"Que ayuden mi pensamiento;\n",
|
869 |
-
"Les pido en este momento\n",
|
870 |
-
"Que voy a cantar mi historia\n",
|
871 |
-
"Me refresquen la memoria\n",
|
872 |
-
"Y aclaren mi entendimiento.\n",
|
873 |
-
"\n",
|
874 |
-
"3\n",
|
875 |
-
"Vengan Santos milagrosos,\n",
|
876 |
-
"Vengan todos en mi ayuda,\n",
|
877 |
-
"Que la lengua se me añuda\n",
|
878 |
-
"Y se me turba\n"
|
879 |
-
]
|
880 |
-
}
|
881 |
-
],
|
882 |
"source": [
|
883 |
"print(tokenizer.decode(lm_datasets[\"train\"][0][\"input_ids\"]))"
|
884 |
]
|
@@ -900,7 +800,7 @@
|
|
900 |
},
|
901 |
{
|
902 |
"cell_type": "code",
|
903 |
-
"execution_count":
|
904 |
"id": "a8b90ba2",
|
905 |
"metadata": {
|
906 |
"scrolled": true,
|
@@ -935,100 +835,21 @@
|
|
935 |
},
|
936 |
{
|
937 |
"cell_type": "code",
|
938 |
-
"execution_count":
|
939 |
-
"id": "
|
940 |
"metadata": {
|
941 |
"slideshow": {
|
942 |
"slide_type": "subslide"
|
943 |
}
|
944 |
},
|
945 |
-
"outputs": [
|
946 |
-
{
|
947 |
-
"data": {
|
948 |
-
"text/html": [
|
949 |
-
"\n",
|
950 |
-
" <div>\n",
|
951 |
-
" \n",
|
952 |
-
" <progress value='180' max='180' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
|
953 |
-
" [180/180 11:44, Epoch 10/10]\n",
|
954 |
-
" </div>\n",
|
955 |
-
" <table border=\"1\" class=\"dataframe\">\n",
|
956 |
-
" <thead>\n",
|
957 |
-
" <tr style=\"text-align: left;\">\n",
|
958 |
-
" <th>Epoch</th>\n",
|
959 |
-
" <th>Training Loss</th>\n",
|
960 |
-
" <th>Validation Loss</th>\n",
|
961 |
-
" </tr>\n",
|
962 |
-
" </thead>\n",
|
963 |
-
" <tbody>\n",
|
964 |
-
" <tr>\n",
|
965 |
-
" <td>1</td>\n",
|
966 |
-
" <td>4.386400</td>\n",
|
967 |
-
" <td>4.202457</td>\n",
|
968 |
-
" </tr>\n",
|
969 |
-
" <tr>\n",
|
970 |
-
" <td>2</td>\n",
|
971 |
-
" <td>3.948000</td>\n",
|
972 |
-
" <td>4.043974</td>\n",
|
973 |
-
" </tr>\n",
|
974 |
-
" <tr>\n",
|
975 |
-
" <td>3</td>\n",
|
976 |
-
" <td>3.796200</td>\n",
|
977 |
-
" <td>3.980350</td>\n",
|
978 |
-
" </tr>\n",
|
979 |
-
" <tr>\n",
|
980 |
-
" <td>4</td>\n",
|
981 |
-
" <td>3.610500</td>\n",
|
982 |
-
" <td>3.945783</td>\n",
|
983 |
-
" </tr>\n",
|
984 |
-
" <tr>\n",
|
985 |
-
" <td>5</td>\n",
|
986 |
-
" <td>3.444400</td>\n",
|
987 |
-
" <td>3.927984</td>\n",
|
988 |
-
" </tr>\n",
|
989 |
-
" <tr>\n",
|
990 |
-
" <td>6</td>\n",
|
991 |
-
" <td>3.385500</td>\n",
|
992 |
-
" <td>3.919229</td>\n",
|
993 |
-
" </tr>\n",
|
994 |
-
" <tr>\n",
|
995 |
-
" <td>7</td>\n",
|
996 |
-
" <td>3.314200</td>\n",
|
997 |
-
" <td>3.909090</td>\n",
|
998 |
-
" </tr>\n",
|
999 |
-
" <tr>\n",
|
1000 |
-
" <td>8</td>\n",
|
1001 |
-
" <td>3.219200</td>\n",
|
1002 |
-
" <td>3.907399</td>\n",
|
1003 |
-
" </tr>\n",
|
1004 |
-
" <tr>\n",
|
1005 |
-
" <td>9</td>\n",
|
1006 |
-
" <td>3.161500</td>\n",
|
1007 |
-
" <td>3.906959</td>\n",
|
1008 |
-
" </tr>\n",
|
1009 |
-
" <tr>\n",
|
1010 |
-
" <td>10</td>\n",
|
1011 |
-
" <td>3.163700</td>\n",
|
1012 |
-
" <td>3.906726</td>\n",
|
1013 |
-
" </tr>\n",
|
1014 |
-
" </tbody>\n",
|
1015 |
-
"</table><p>"
|
1016 |
-
],
|
1017 |
-
"text/plain": [
|
1018 |
-
"<IPython.core.display.HTML object>"
|
1019 |
-
]
|
1020 |
-
},
|
1021 |
-
"metadata": {},
|
1022 |
-
"output_type": "display_data"
|
1023 |
-
}
|
1024 |
-
],
|
1025 |
"source": [
|
1026 |
"from transformers import Trainer, TrainingArguments\n",
|
1027 |
"\n",
|
1028 |
"training_args = TrainingArguments(\n",
|
1029 |
" \"flisol-cba-martin-fierro\",\n",
|
1030 |
-
"
|
1031 |
-
" num_train_epochs=
|
1032 |
" learning_rate=2e-5,\n",
|
1033 |
" weight_decay=0.01,\n",
|
1034 |
" logging_steps=5,\n",
|
@@ -1041,7 +862,20 @@
|
|
1041 |
" eval_dataset=lm_datasets[\"validation\"],\n",
|
1042 |
")\n",
|
1043 |
"\n",
|
1044 |
-
"trainer.train()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1045 |
"trainer.push_to_hub() # This pushes the trained model to Hugging Face model repository\n",
|
1046 |
"tokenizer.push_to_hub(\"flisol-cba-martin-fierro\")"
|
1047 |
]
|
@@ -1064,30 +898,14 @@
|
|
1064 |
},
|
1065 |
{
|
1066 |
"cell_type": "code",
|
1067 |
-
"execution_count":
|
1068 |
"id": "6a35e80f",
|
1069 |
"metadata": {
|
1070 |
"slideshow": {
|
1071 |
"slide_type": "fragment"
|
1072 |
}
|
1073 |
},
|
1074 |
-
"outputs": [
|
1075 |
-
{
|
1076 |
-
"name": "stdout",
|
1077 |
-
"output_type": "stream",
|
1078 |
-
"text": [
|
1079 |
-
"Aquí me pongo a cantar;\n",
|
1080 |
-
"y si tengo el sueño:\n",
|
1081 |
-
"de pronto se me ha quedado la sangre:\n",
|
1082 |
-
"como te asombre se me\n",
|
1083 |
-
"lo oí decir muchas veces,\n",
|
1084 |
-
"pero el tiempo me ha borrado.\n",
|
1085 |
-
"\n",
|
1086 |
-
"2\n",
|
1087 |
-
"Soy\n"
|
1088 |
-
]
|
1089 |
-
}
|
1090 |
-
],
|
1091 |
"source": [
|
1092 |
"import torch\n",
|
1093 |
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
|
|
|
258 |
},
|
259 |
{
|
260 |
"cell_type": "code",
|
261 |
+
"execution_count": null,
|
262 |
"id": "0e0d53be",
|
263 |
"metadata": {
|
264 |
"slideshow": {
|
|
|
276 |
"\n",
|
277 |
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
278 |
"tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)\n",
|
279 |
+
"model = AutoModelForCausalLM.from_pretrained(\n",
|
280 |
+
" BASE_MODEL, torch_dtype=\"auto\"\n",
|
281 |
+
").to(device)"
|
282 |
]
|
283 |
},
|
284 |
{
|
|
|
341 |
},
|
342 |
{
|
343 |
"cell_type": "code",
|
344 |
+
"execution_count": null,
|
345 |
"id": "c1227c49",
|
346 |
"metadata": {
|
347 |
"slideshow": {
|
|
|
380 |
},
|
381 |
{
|
382 |
"cell_type": "code",
|
383 |
+
"execution_count": null,
|
384 |
"id": "11bec6de",
|
385 |
"metadata": {
|
386 |
"slideshow": {
|
387 |
"slide_type": "fragment"
|
388 |
}
|
389 |
},
|
390 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
391 |
"source": [
|
392 |
"MAX_TOKENS = 50\n",
|
393 |
"input_ids = tokenizer.encode(PROMPT, return_tensors=\"pt\").to(device)\n",
|
|
|
417 |
},
|
418 |
{
|
419 |
"cell_type": "code",
|
420 |
+
"execution_count": null,
|
421 |
"id": "dc66f288",
|
422 |
"metadata": {
|
423 |
"slideshow": {
|
424 |
"slide_type": "fragment"
|
425 |
}
|
426 |
},
|
427 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
"source": [
|
429 |
"torch.manual_seed(42) # To ensure determinism\n",
|
430 |
"\n",
|
|
|
569 |
},
|
570 |
{
|
571 |
"cell_type": "code",
|
572 |
+
"execution_count": null,
|
573 |
"id": "17f2884d",
|
574 |
"metadata": {
|
575 |
"slideshow": {
|
|
|
581 |
"import torch\n",
|
582 |
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
|
583 |
"\n",
|
584 |
+
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
|
585 |
+
"\n",
|
586 |
"BASE_MODEL = \"DeepESP/gpt2-spanish\" # We play with a smaller model\n",
|
587 |
"tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)\n",
|
588 |
"model = AutoModelForCausalLM.from_pretrained(BASE_MODEL).to(device)"
|
|
|
604 |
},
|
605 |
{
|
606 |
"cell_type": "code",
|
607 |
+
"execution_count": null,
|
608 |
"id": "322a4a9b",
|
609 |
"metadata": {
|
610 |
"slideshow": {
|
611 |
"slide_type": "fragment"
|
612 |
}
|
613 |
},
|
614 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
615 |
"source": [
|
616 |
"torch.manual_seed(42) # To ensure determinism\n",
|
617 |
"\n",
|
|
|
641 |
},
|
642 |
{
|
643 |
"cell_type": "code",
|
644 |
+
"execution_count": null,
|
645 |
"id": "5a27197e",
|
646 |
"metadata": {
|
647 |
"slideshow": {
|
648 |
"slide_type": "fragment"
|
649 |
}
|
650 |
},
|
651 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
652 |
"source": [
|
653 |
"from datasets import load_dataset\n",
|
654 |
"\n",
|
|
|
679 |
},
|
680 |
{
|
681 |
"cell_type": "code",
|
682 |
+
"execution_count": null,
|
683 |
"id": "33059c5f",
|
684 |
"metadata": {
|
685 |
"scrolled": true,
|
|
|
717 |
},
|
718 |
{
|
719 |
"cell_type": "code",
|
720 |
+
"execution_count": null,
|
721 |
"id": "3100e195",
|
722 |
"metadata": {
|
723 |
"scrolled": true,
|
|
|
756 |
},
|
757 |
{
|
758 |
"cell_type": "code",
|
759 |
+
"execution_count": null,
|
760 |
"id": "b9d33b7b",
|
761 |
"metadata": {
|
762 |
"slideshow": {
|
763 |
"slide_type": "fragment"
|
764 |
}
|
765 |
},
|
766 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
767 |
"source": [
|
768 |
"print(len(lm_datasets[\"train\"][0][\"input_ids\"]))\n",
|
769 |
"print(lm_datasets[\"train\"][0][\"input_ids\"][:10])"
|
|
|
771 |
},
|
772 |
{
|
773 |
"cell_type": "code",
|
774 |
+
"execution_count": null,
|
775 |
"id": "7dfb316d",
|
776 |
"metadata": {
|
777 |
"slideshow": {
|
778 |
"slide_type": "fragment"
|
779 |
}
|
780 |
},
|
781 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
782 |
"source": [
|
783 |
"print(tokenizer.decode(lm_datasets[\"train\"][0][\"input_ids\"]))"
|
784 |
]
|
|
|
800 |
},
|
801 |
{
|
802 |
"cell_type": "code",
|
803 |
+
"execution_count": null,
|
804 |
"id": "a8b90ba2",
|
805 |
"metadata": {
|
806 |
"scrolled": true,
|
|
|
835 |
},
|
836 |
{
|
837 |
"cell_type": "code",
|
838 |
+
"execution_count": null,
|
839 |
+
"id": "ccd8e608-7e14-4796-9e52-c55b6df3ce6f",
|
840 |
"metadata": {
|
841 |
"slideshow": {
|
842 |
"slide_type": "subslide"
|
843 |
}
|
844 |
},
|
845 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
846 |
"source": [
|
847 |
"from transformers import Trainer, TrainingArguments\n",
|
848 |
"\n",
|
849 |
"training_args = TrainingArguments(\n",
|
850 |
" \"flisol-cba-martin-fierro\",\n",
|
851 |
+
" eval_strategy=\"epoch\",\n",
|
852 |
+
" num_train_epochs=25,\n",
|
853 |
" learning_rate=2e-5,\n",
|
854 |
" weight_decay=0.01,\n",
|
855 |
" logging_steps=5,\n",
|
|
|
862 |
" eval_dataset=lm_datasets[\"validation\"],\n",
|
863 |
")\n",
|
864 |
"\n",
|
865 |
+
"trainer.train();"
|
866 |
+
]
|
867 |
+
},
|
868 |
+
{
|
869 |
+
"cell_type": "code",
|
870 |
+
"execution_count": null,
|
871 |
+
"id": "16695fc7-9baa-41f7-9367-d8fbcd987b79",
|
872 |
+
"metadata": {
|
873 |
+
"slideshow": {
|
874 |
+
"slide_type": "subslide"
|
875 |
+
}
|
876 |
+
},
|
877 |
+
"outputs": [],
|
878 |
+
"source": [
|
879 |
"trainer.push_to_hub() # This pushes the trained model to Hugging Face model repository\n",
|
880 |
"tokenizer.push_to_hub(\"flisol-cba-martin-fierro\")"
|
881 |
]
|
|
|
898 |
},
|
899 |
{
|
900 |
"cell_type": "code",
|
901 |
+
"execution_count": null,
|
902 |
"id": "6a35e80f",
|
903 |
"metadata": {
|
904 |
"slideshow": {
|
905 |
"slide_type": "fragment"
|
906 |
}
|
907 |
},
|
908 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
909 |
"source": [
|
910 |
"import torch\n",
|
911 |
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
|