Commit
•
b161832
1
Parent(s):
184726d
Finished presentation notebook
Browse files- flisol-cordoba-2023.ipynb +80 -88
flisol-cordoba-2023.ipynb
CHANGED
@@ -261,7 +261,7 @@
|
|
261 |
},
|
262 |
{
|
263 |
"cell_type": "code",
|
264 |
-
"execution_count":
|
265 |
"id": "0e0d53be",
|
266 |
"metadata": {
|
267 |
"slideshow": {
|
@@ -330,7 +330,7 @@
|
|
330 |
},
|
331 |
{
|
332 |
"cell_type": "code",
|
333 |
-
"execution_count":
|
334 |
"id": "c1227c49",
|
335 |
"metadata": {
|
336 |
"slideshow": {
|
@@ -369,14 +369,32 @@
|
|
369 |
},
|
370 |
{
|
371 |
"cell_type": "code",
|
372 |
-
"execution_count":
|
373 |
"id": "11bec6de",
|
374 |
"metadata": {
|
375 |
"slideshow": {
|
376 |
"slide_type": "fragment"
|
377 |
}
|
378 |
},
|
379 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
"source": [
|
381 |
"MAX_TOKENS = 50\n",
|
382 |
"input_ids = tokenizer.encode(PROMPT, return_tensors='pt')\n",
|
@@ -406,14 +424,33 @@
|
|
406 |
},
|
407 |
{
|
408 |
"cell_type": "code",
|
409 |
-
"execution_count":
|
410 |
"id": "dc66f288",
|
411 |
"metadata": {
|
412 |
"slideshow": {
|
413 |
"slide_type": "fragment"
|
414 |
}
|
415 |
},
|
416 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
417 |
"source": [
|
418 |
"torch.manual_seed(42) # To ensure determinism\n",
|
419 |
"\n",
|
@@ -473,8 +510,7 @@
|
|
473 |
"\"\"\".strip()\n",
|
474 |
"\n",
|
475 |
"chatbot = ChatBot(\n",
|
476 |
-
" base_model=
|
477 |
-
" tokenizer=tokenizer,\n",
|
478 |
" initial_prompt=PROMPT,\n",
|
479 |
" keep_context=True,\n",
|
480 |
" creative=True,\n",
|
@@ -597,14 +633,6 @@
|
|
597 |
}
|
598 |
},
|
599 |
"outputs": [
|
600 |
-
{
|
601 |
-
"name": "stderr",
|
602 |
-
"output_type": "stream",
|
603 |
-
"text": [
|
604 |
-
"The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n",
|
605 |
-
"Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
|
606 |
-
]
|
607 |
-
},
|
608 |
{
|
609 |
"name": "stdout",
|
610 |
"output_type": "stream",
|
@@ -654,17 +682,10 @@
|
|
654 |
}
|
655 |
},
|
656 |
"outputs": [
|
657 |
-
{
|
658 |
-
"name": "stderr",
|
659 |
-
"output_type": "stream",
|
660 |
-
"text": [
|
661 |
-
"Found cached dataset text (/home/crscardellino/.cache/huggingface/datasets/text/default-623d9572e8f69157/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n"
|
662 |
-
]
|
663 |
-
},
|
664 |
{
|
665 |
"data": {
|
666 |
"application/vnd.jupyter.widget-view+json": {
|
667 |
-
"model_id": "
|
668 |
"version_major": 2,
|
669 |
"version_minor": 0
|
670 |
},
|
@@ -724,16 +745,7 @@
|
|
724 |
"slide_type": "fragment"
|
725 |
}
|
726 |
},
|
727 |
-
"outputs": [
|
728 |
-
{
|
729 |
-
"name": "stderr",
|
730 |
-
"output_type": "stream",
|
731 |
-
"text": [
|
732 |
-
"Loading cached processed dataset at /home/crscardellino/.cache/huggingface/datasets/text/default-623d9572e8f69157/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2/cache-5a0f77d99160fc1c_*_of_00004.arrow\n",
|
733 |
-
"Loading cached processed dataset at /home/crscardellino/.cache/huggingface/datasets/text/default-623d9572e8f69157/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2/cache-003d85e2eebe3231_*_of_00004.arrow\n"
|
734 |
-
]
|
735 |
-
}
|
736 |
-
],
|
737 |
"source": [
|
738 |
"from utils import tokenize # local module in the repository\n",
|
739 |
"\n",
|
@@ -769,16 +781,7 @@
|
|
769 |
"slide_type": "fragment"
|
770 |
}
|
771 |
},
|
772 |
-
"outputs": [
|
773 |
-
{
|
774 |
-
"name": "stderr",
|
775 |
-
"output_type": "stream",
|
776 |
-
"text": [
|
777 |
-
"Loading cached processed dataset at /home/crscardellino/.cache/huggingface/datasets/text/default-623d9572e8f69157/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2/cache-01936c1905752293_*_of_00004.arrow\n",
|
778 |
-
"Loading cached processed dataset at /home/crscardellino/.cache/huggingface/datasets/text/default-623d9572e8f69157/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2/cache-af8dcd60a546c28d_*_of_00004.arrow\n"
|
779 |
-
]
|
780 |
-
}
|
781 |
-
],
|
782 |
"source": [
|
783 |
"from functools import partial\n",
|
784 |
"from utils import group_texts # local module in the repository\n",
|
@@ -903,13 +906,18 @@
|
|
903 |
},
|
904 |
"outputs": [
|
905 |
{
|
906 |
-
"
|
907 |
-
|
908 |
-
|
909 |
-
|
910 |
-
|
911 |
-
|
912 |
-
|
|
|
|
|
|
|
|
|
|
|
913 |
}
|
914 |
],
|
915 |
"source": [
|
@@ -946,14 +954,6 @@
|
|
946 |
}
|
947 |
},
|
948 |
"outputs": [
|
949 |
-
{
|
950 |
-
"name": "stderr",
|
951 |
-
"output_type": "stream",
|
952 |
-
"text": [
|
953 |
-
"/home/crscardellino/Projects/research/flisol/flisol-cba-martin-fierro/venv/lib/python3.10/site-packages/transformers/optimization.py:391: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
|
954 |
-
" warnings.warn(\n"
|
955 |
-
]
|
956 |
-
},
|
957 |
{
|
958 |
"data": {
|
959 |
"text/html": [
|
@@ -1031,16 +1031,6 @@
|
|
1031 |
},
|
1032 |
"metadata": {},
|
1033 |
"output_type": "display_data"
|
1034 |
-
},
|
1035 |
-
{
|
1036 |
-
"data": {
|
1037 |
-
"text/plain": [
|
1038 |
-
"TrainOutput(global_step=180, training_loss=3.5808190133836533, metrics={'train_runtime': 707.4357, 'train_samples_per_second': 1.951, 'train_steps_per_second': 0.254, 'total_flos': 90145751040000.0, 'train_loss': 3.5808190133836533, 'epoch': 10.0})"
|
1039 |
-
]
|
1040 |
-
},
|
1041 |
-
"execution_count": 9,
|
1042 |
-
"metadata": {},
|
1043 |
-
"output_type": "execute_result"
|
1044 |
}
|
1045 |
],
|
1046 |
"source": [
|
@@ -1062,20 +1052,7 @@
|
|
1062 |
" eval_dataset=lm_datasets[\"validation\"]\n",
|
1063 |
")\n",
|
1064 |
"\n",
|
1065 |
-
"trainer.train()"
|
1066 |
-
]
|
1067 |
-
},
|
1068 |
-
{
|
1069 |
-
"cell_type": "code",
|
1070 |
-
"execution_count": null,
|
1071 |
-
"id": "d43c5555",
|
1072 |
-
"metadata": {
|
1073 |
-
"slideshow": {
|
1074 |
-
"slide_type": "-"
|
1075 |
-
}
|
1076 |
-
},
|
1077 |
-
"outputs": [],
|
1078 |
-
"source": [
|
1079 |
"trainer.push_to_hub() # This pushes the trained model to Hugging Face model repository"
|
1080 |
]
|
1081 |
},
|
@@ -1097,21 +1074,36 @@
|
|
1097 |
},
|
1098 |
{
|
1099 |
"cell_type": "code",
|
1100 |
-
"execution_count":
|
1101 |
"id": "6a35e80f",
|
1102 |
"metadata": {
|
1103 |
"slideshow": {
|
1104 |
"slide_type": "fragment"
|
1105 |
}
|
1106 |
},
|
1107 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1108 |
"source": [
|
1109 |
"import torch\n",
|
1110 |
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
|
1111 |
"\n",
|
1112 |
-
"
|
1113 |
-
"
|
1114 |
-
"model = AutoModelForCausalLM.from_pretrained(BASE_MODEL)\n",
|
1115 |
"\n",
|
1116 |
"torch.manual_seed(42) # To ensure determinism\n",
|
1117 |
"\n",
|
261 |
},
|
262 |
{
|
263 |
"cell_type": "code",
|
264 |
+
"execution_count": 1,
|
265 |
"id": "0e0d53be",
|
266 |
"metadata": {
|
267 |
"slideshow": {
|
330 |
},
|
331 |
{
|
332 |
"cell_type": "code",
|
333 |
+
"execution_count": 2,
|
334 |
"id": "c1227c49",
|
335 |
"metadata": {
|
336 |
"slideshow": {
|
369 |
},
|
370 |
{
|
371 |
"cell_type": "code",
|
372 |
+
"execution_count": 3,
|
373 |
"id": "11bec6de",
|
374 |
"metadata": {
|
375 |
"slideshow": {
|
376 |
"slide_type": "fragment"
|
377 |
}
|
378 |
},
|
379 |
+
"outputs": [
|
380 |
+
{
|
381 |
+
"name": "stdout",
|
382 |
+
"output_type": "stream",
|
383 |
+
"text": [
|
384 |
+
"La siguiente es una conversación entre un HUMANO y un bot EXPERTO en software libre.\n",
|
385 |
+
"El EXPERTO le ayuda al HUMANO con preguntas acerca de software libre.\n",
|
386 |
+
"El EXPERTO es conversacional, optimista, flexible, creativo y genera respuestas parecidas a un humano.\n",
|
387 |
+
"\n",
|
388 |
+
"HUMANO: Hola, ¿Cómo estás?\n",
|
389 |
+
"EXPERTO: Hola, pmuy bien. Estoy acá para ayudarte con preguntas respecto al software libre.\n",
|
390 |
+
"\n",
|
391 |
+
"HUMANO: ¿Qué es el software libre?\n",
|
392 |
+
"EXPERTO: El software libre es un software que se puede modificar, redistribuir y distribuir libremente.\n",
|
393 |
+
"HUMANO: ¿En qué consiste la licencia GPL?\n",
|
394 |
+
"EXPERTO: La licencia GPL es una licencia de software libre que permite a los usuarios modificar, redistribuir\n"
|
395 |
+
]
|
396 |
+
}
|
397 |
+
],
|
398 |
"source": [
|
399 |
"MAX_TOKENS = 50\n",
|
400 |
"input_ids = tokenizer.encode(PROMPT, return_tensors='pt')\n",
|
424 |
},
|
425 |
{
|
426 |
"cell_type": "code",
|
427 |
+
"execution_count": 4,
|
428 |
"id": "dc66f288",
|
429 |
"metadata": {
|
430 |
"slideshow": {
|
431 |
"slide_type": "fragment"
|
432 |
}
|
433 |
},
|
434 |
+
"outputs": [
|
435 |
+
{
|
436 |
+
"name": "stdout",
|
437 |
+
"output_type": "stream",
|
438 |
+
"text": [
|
439 |
+
"La siguiente es una conversación entre un HUMANO y un bot EXPERTO en software libre.\n",
|
440 |
+
"El EXPERTO le ayuda al HUMANO con preguntas acerca de software libre.\n",
|
441 |
+
"El EXPERTO es conversacional, optimista, flexible, creativo y genera respuestas parecidas a un humano.\n",
|
442 |
+
"\n",
|
443 |
+
"HUMANO: Hola, ¿Cómo estás?\n",
|
444 |
+
"EXPERTO: Hola, pmuy bien. Estoy acá para ayudarte con preguntas respecto al software libre.\n",
|
445 |
+
"\n",
|
446 |
+
"HUMANO: ¿Qué es el software libre?\n",
|
447 |
+
"EXPERTO: El software libre, es aquel software que esta escrito en un lenguaje de programación que puede ser modificado y copiado por cualquier persona o entidad.\n",
|
448 |
+
"\n",
|
449 |
+
"HUMANO: ¿En general cuáles son los usos que se pueden dar a un software libre?\n",
|
450 |
+
"EXPERTO\n"
|
451 |
+
]
|
452 |
+
}
|
453 |
+
],
|
454 |
"source": [
|
455 |
"torch.manual_seed(42) # To ensure determinism\n",
|
456 |
"\n",
|
510 |
"\"\"\".strip()\n",
|
511 |
"\n",
|
512 |
"chatbot = ChatBot(\n",
|
513 |
+
" base_model='bigscience/bloom-3b',\n",
|
|
|
514 |
" initial_prompt=PROMPT,\n",
|
515 |
" keep_context=True,\n",
|
516 |
" creative=True,\n",
|
633 |
}
|
634 |
},
|
635 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
636 |
{
|
637 |
"name": "stdout",
|
638 |
"output_type": "stream",
|
682 |
}
|
683 |
},
|
684 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
685 |
{
|
686 |
"data": {
|
687 |
"application/vnd.jupyter.widget-view+json": {
|
688 |
+
"model_id": "123690f207a94d3e850acef7a13133a6",
|
689 |
"version_major": 2,
|
690 |
"version_minor": 0
|
691 |
},
|
745 |
"slide_type": "fragment"
|
746 |
}
|
747 |
},
|
748 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
749 |
"source": [
|
750 |
"from utils import tokenize # local module in the repository\n",
|
751 |
"\n",
|
781 |
"slide_type": "fragment"
|
782 |
}
|
783 |
},
|
784 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
785 |
"source": [
|
786 |
"from functools import partial\n",
|
787 |
"from utils import group_texts # local module in the repository\n",
|
906 |
},
|
907 |
"outputs": [
|
908 |
{
|
909 |
+
"data": {
|
910 |
+
"application/vnd.jupyter.widget-view+json": {
|
911 |
+
"model_id": "94b41ffd721d4bbf8840df3fee46bbb2",
|
912 |
+
"version_major": 2,
|
913 |
+
"version_minor": 0
|
914 |
+
},
|
915 |
+
"text/plain": [
|
916 |
+
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
917 |
+
]
|
918 |
+
},
|
919 |
+
"metadata": {},
|
920 |
+
"output_type": "display_data"
|
921 |
}
|
922 |
],
|
923 |
"source": [
|
954 |
}
|
955 |
},
|
956 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
957 |
{
|
958 |
"data": {
|
959 |
"text/html": [
|
1031 |
},
|
1032 |
"metadata": {},
|
1033 |
"output_type": "display_data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1034 |
}
|
1035 |
],
|
1036 |
"source": [
|
1052 |
" eval_dataset=lm_datasets[\"validation\"]\n",
|
1053 |
")\n",
|
1054 |
"\n",
|
1055 |
+
"trainer.train()\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1056 |
"trainer.push_to_hub() # This pushes the trained model to Hugging Face model repository"
|
1057 |
]
|
1058 |
},
|
1074 |
},
|
1075 |
{
|
1076 |
"cell_type": "code",
|
1077 |
+
"execution_count": 1,
|
1078 |
"id": "6a35e80f",
|
1079 |
"metadata": {
|
1080 |
"slideshow": {
|
1081 |
"slide_type": "fragment"
|
1082 |
}
|
1083 |
},
|
1084 |
+
"outputs": [
|
1085 |
+
{
|
1086 |
+
"name": "stdout",
|
1087 |
+
"output_type": "stream",
|
1088 |
+
"text": [
|
1089 |
+
"Aquí me pongo a cantar;\n",
|
1090 |
+
"y si tengo el sueño:\n",
|
1091 |
+
"de pronto se me ha quedado la sangre:\n",
|
1092 |
+
"como te asombre se me\n",
|
1093 |
+
"lo oí decir muchas veces,\n",
|
1094 |
+
"pero el tiempo me ha borrado.\n",
|
1095 |
+
"\n",
|
1096 |
+
"2\n",
|
1097 |
+
"Soy\n"
|
1098 |
+
]
|
1099 |
+
}
|
1100 |
+
],
|
1101 |
"source": [
|
1102 |
"import torch\n",
|
1103 |
"from transformers import AutoModelForCausalLM, AutoTokenizer\n",
|
1104 |
"\n",
|
1105 |
+
"tokenizer = AutoTokenizer.from_pretrained(\"DeepESP/gpt2-spanish\")\n",
|
1106 |
+
"model = AutoModelForCausalLM.from_pretrained(\"crscardellino/flisol-cba-martin-fierro\")\n",
|
|
|
1107 |
"\n",
|
1108 |
"torch.manual_seed(42) # To ensure determinism\n",
|
1109 |
"\n",
|