{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "ldo7vN9_yfm2" }, "source": [ "환경설정(Colab 내 한글 오류 제거)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "scNoqBhEyhfV" }, "outputs": [], "source": [ "import locale\n", "def getpreferredencoding(do_setlocale = True):\n", " return \"UTF-8\"\n", "locale.getpreferredencoding = getpreferredencoding" ] }, { "cell_type": "markdown", "metadata": { "id": "muzP7Pygx-sa" }, "source": [ "baseline model(HuggingFaceH4/zephyr-7b-alpha) & framework 다운로드" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JKyH9LK_w6R3", "outputId": "95aac920-3208-42cc-9643-e81dca6a470d" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.37.2)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.20.3)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.25.2)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.12.25)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.2)\n", "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.2)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.2)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (2023.6.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (4.9.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.2.2)\n" ] } ], "source": [ "!pip install transformers" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "NqFqCDwoxSFx", "outputId": "71310549-36d2-4c34-9a93-6d4f2ffa1ca9" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting accelerate\n", " Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)\n", "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/280.0 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━\u001b[0m \u001b[32m184.3/280.0 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m280.0/280.0 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.25.2)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (23.2)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate) (6.0.1)\n", "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.1.0+cu121)\n", "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.20.3)\n", "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.4.2)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.13.1)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (4.9.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.2.1)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.3)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2023.6.0)\n", "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.1.0)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (2.31.0)\n", "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (4.66.2)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.5)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2024.2.2)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n", "Installing collected packages: accelerate\n", "Successfully installed accelerate-0.27.2\n" ] } ], "source": [ "!pip install accelerate" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vQKnRsvwxWxm", "outputId": "b513a3e1-733c-4996-a186-3ba549827e88" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting git+https://github.com/huggingface/transformers.git\n", " Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-ue682nud\n", " Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-req-build-ue682nud\n", " Resolved https://github.com/huggingface/transformers.git to commit 58245ba6fba739a03388f52b0773d2fc965701e3\n", " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (3.13.1)\n", "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (0.20.3)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (1.25.2)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (23.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (6.0.1)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (2023.12.25)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (2.31.0)\n", "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (0.15.2)\n", "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (0.4.2)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (4.66.2)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.39.0.dev0) (2023.6.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.39.0.dev0) (4.9.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.39.0.dev0) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.39.0.dev0) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.39.0.dev0) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.39.0.dev0) (2024.2.2)\n", "Building wheels for collected packages: transformers\n", " Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for transformers: filename=transformers-4.39.0.dev0-py3-none-any.whl size=8515607 sha256=81123cdaf3dfe5dd14d1083f793e0c1f46b05c4e85e84aa33c4c44b2d030a291\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-jc8kbl1o/wheels/e7/9c/5b/e1a9c8007c343041e61cc484433d512ea9274272e3fcbe7c16\n", "Successfully built transformers\n", "Installing collected packages: transformers\n", " Attempting uninstall: transformers\n", " Found existing installation: transformers 4.37.2\n", " Uninstalling transformers-4.37.2:\n", " Successfully uninstalled transformers-4.37.2\n", "Successfully installed transformers-4.39.0.dev0\n" ] } ], "source": [ "!pip install git+https://github.com/huggingface/transformers.git" ] }, { "cell_type": "markdown", "metadata": { "id": "p87VpRJVgMtp" }, "source": [ "pipeline 정의" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "ly-zzDbuvd_q" }, "outputs": [], "source": [ "import torch\n", "from transformers import pipeline" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 739, "referenced_widgets": [ "468bd0804e714998be8e423b71194a81", "8a5379b6a9d146e1af2752b289d584da", "550ae6e683134c41a43f279c366526f1", "4d519807dbc442b18b2361b53004f6e8", "344f93e3b81d4830abcd5c39c7ef947d", "75e3dbf3566e4c9d9e7e00c656f73635", "5bcc78c41b6f44348080173e54081341", "7ef7788986eb47c392941160f0d2cc68", "9734547751ac486db67f8bae5fee010d", "8d925e5f1c784698a758af138d1e5d44", "23385b0820b14a90a58b7a0a5fb759c6", "77c378c2f22d4ab897c18ed5177d025b", "ecf0e68dcada44108c4611e10c0ed0bd", "70c15531c671416d9d09f324e9c8a5b9", "5becc78a20404ca78b255a3b3948d64a", "29a17397d8e24a66a622699969d7ee56", "1cd0f4a8171a4c7693cd48f2a6d7a8fb", "8a8eb317c9274f91a4197ee6f84e6ae9", "915cf0f45d6d473daa12c3e85f172319", "cb2ca8c5a5ac4c1b903fce5ad21e896b", "11417aea5939423a8cbb58615c393d5c", "7ae0f085cf5a479299a0dbad1c91a0dd", "1c00a7eb95684f469f65b4a9d8db20ac", "5c280ca23257444486344302ed31419f", "46ee5a1b84034dae861b18d5a7351f14", "747360fb8fad424dbbfb8c215cf0151e", "bdc138142e6b442ab3ac7ae35b933ccc", "cdbde524f1394121bccd402d26b4d6e2", "1205857581734f64a01b7aba4af542bb", "1d406b22f86d4e2e82c42f1548a617e5", "636f96633b2a4452b143790c595aa537", "de95266049ad478a9e7cd94a7c3775e8", "f3376dbcb1674675bb20702d0fd3f94f", "27f662a223454a9e9f0695028818d109", "7681f1e214454958a0372ef7a549e09d", "91946046baae412e968e21697748d203", "bcbe795bb4a24f61b9a0a66d5f8edd1d", "054644158eb84f9989bd83a1b8ae0b0b", "cd48c57b926f476c86547a080fb3479f", "5b748122b9434c3a80f6a8848be9a11f", "8509825df02c4eba96ef584293fb5e77", "753a3767ded54747a940a7329ca7d3d9", "6e62ebe098354db792d24d8bf8d5e87b", "b9266b2163c742f186d11d139661cb94", "547d956e763045ffba5da87e751d86e7", "84130fb9ed8d48e2b900bef6e05211a9", "a8e2cb25b41b475f85bfee173648fb4e", "d50699846b0249849518b6a0ac8f4718", "33c5ec6cdfed4a1b8793dd7862cce04e", "1c8629b46ef24b7a9bbfaa7af6ff3470", "d0d033fc9f914a478a542385fd07d2a3", "eeaace1a016f432d8069ac859154bb18", "d121471a61994149a514c2db8e1fd8f8", "768ec4f1781c443996d95eabf3b96f8d", "b53023c325b2488bb79bbf9a4373b875", "de770e11bfde48448fc9cdad4386351f", "ae86779f841c4e4f9ee68cc1dab174a7", "e435c644d7624dec8527d78a8bb28a11", "5babd4d5265b4b3fb0bd22f92ef3940e", "71e93ae749c541caaf067f4a4bb674a6", "83e7bb93699746a598d82cc73375d0ef", "b6efa7f14e18499faa2be71fa0b076ba", "901cb1f79211404b818f5f4ea14af008", "0af40e57008c4521841347bbc8859737", "5e78890de7144b4a8ca50e3ebfd6109a", "4ebe00eb9c5846d2ae4f182ac846fb91", "9c5da00c2dcc4f83a0b5b3eaaec3b7f6", "78c9abfeccb9484f8cb752a6c06d083e", "5e6ca3d29c614a30826a9138159f29b9", "0012a24edaee43199442d329787e914c", "8a15543b3cce4a5a865dfa269e8e4211", "bccf30986c70435e9d061aef9cc65e50", "18dea682afb044cbaac1909ad1ab759e", "f9d6b68ecce94066978c5a72e80d9a17", "e8881ad19bdb4a69a0c999dcc6f417c6", "8277e3377db348dab12eded778d6aa03", "06a98ce3f4bc4ddfac2b8602eb47da14", "ad63f621d597461bb682415441ead9a2", "c927a1bf7b074f0f923b0a4e8df028e9", "4bc5f214ea3f4c76b580ca0be3b5e616", "4fe647a38cd74339b31f9ff176904bff", "830fb3a891a24ab1b4cf61ef42f66ec7", "de00d5bbaf404e3a975f18d1bc28dcd2", "33dd7869df1b432685538e5edbec9f0e", "c6f3e043e865471c9b4a0b050eef81da", "56d03ef552fd4ca1bf777b1bbfc3029c", "ffe9b6c024b14cadbfba8980a230f92f", "538a1625178d4ef391dc973123fadfc5", "43c885b49aaa46aab6a233ec445b3ed0", "bbb1f95de98748c99f0f4bef552e7346", "23d6cc092b8043779f790d6589c111f5", "5542f954b75c4490a6a63fa1e748d220", "81a497f6e6994634b2caadb449b64112", "2900434390804ae388ce5595e00611b1", "9a75c22b79f149ad83d452fa4c03b3a0", "c2aa0012342a44bda99c8514201bccb5", "35e25252ca42448aa9eb0665a8e69a1e", "638189870edd44df96527a2f2f29fb22", "69810c4c21de4bdab2f01bed59876e93", "2193b5f244064ff8995caad45a66f7ad", "8c2b8c0f1e9740a9aae2b586c56bad89", "cc9e0979913f4f70b8f4cac24c9f8c3a", "b72334eba0c54a86b90a159bdcee75c5", "1b1b3329f9ec414aaf079d2bb755e733", "80298bc10e654aeda3a866bac9417fe7", "e09c507eaa924fb7a83ea4eaeddaf58e", "0b31cef1cdb648f592f53876627a78ab", "78097ec5c54749688ff2abc648d93ce3", "ab2c6354776a47c9b7314bef893b3c17", "14683454ce6348c4a4b6435ce084e000", "37d1da0871e547058e12db99b0eb22a2", "5df56d1ec2f94c6f91fda3303d1bfe1b", "e9a6fa482cc443289e52529c4913458d", "b0b4e590c3a84a73a60cb25a068eefd6", "82fd59de90e34be9a8410011d88690af", "649339abc04a4757b6e821d6b7c44f09", "2e2e283d967f468588556963c964a7e9", "b7a9c5ef0b8d434ab0862047731be62c", "f7d4248e4e5e45d085159979f23713c8", "965ac469199047b29ed516b36ba5acd0", "1d7fa5c6f6f8429cbee8f1a9172b0669", "fa6a8cea4f424bd68501eb1582fdb1e3", "cc1af154497d43558689ec66e4352552", "9382476e51704c4eaabec060c0b553af", "8cb3c781ca1c4f08a4474ab25068ab69", "e3bf27766ed44fb5b0008d746ef61bf8", "003c26b1dccc4c6c8f41a7ed76ff140f", "fb7a181139c845a28f6810cfd0923ebf", "d1bce0f5e1a24e1d976ee0e6c26f56ee", "b7d665c1050048a38a3bb51a00847d00", "de47dc40aefe44248b1bca50381ed340", "fa931161dfea47909c8145d9926819b1", "7df47628683d40c7bcf51ccc6764392b", "998e27b55cbc4109ae2bcd3ab22d204d", "a73d65b164514b68ba7e636b37801704", "1f19411e8cb746edb2f94289752fd39a", "953e10ae051e46b3a632c5ffbb7cb404", "899334a346ad481080d0c000aa896258", "6caaee008776462c95e0cc77d8f5bfc8", "3e79a03ba76f43d386526084326006f8", "0cdbe6641a994981bdf363ae026204c0", "fbd3490843f0468bb95438d9bb4377f6", "32005b300c4843c8a7e7455079050ac8", "b55b6646fbd0484fa76d51b62be95f80", "59c97d50285e4e7c8bb0ec2413327194", "295c12c1da2b4354953fae9d7d965808", "5d81a9c15af94e40b84471d83e5fce6c", "723dfe54e7f6453b84d46666d42c3fe7", "005faa11d32041ad83e80c35c0edbb28", "51377cce9f184de29083b80ccce3cdfc", "de6cd0b168b34bf08bcdec636861ee10", "e9f5a6ea928f4dda95cbe38deb1cd659", "fe8ea301b90b4122bff70bb07f83528f", "ec0a9fb66279418fac74635e4f46a3f3", "735d7902b41547cea71c42b42e2cbab0", "4bcdef1203394ebcabfe295129e7f06e", "cf29e4a8f7684c23a6909888a30f1e2a", "39fe16abf08d45b493d10d2529d8c1bf", "a725a02887b64e0d9c03606b573bdb88", "b941b878c05b480f98d820ffd5622c5d", "88e14dae021b41e482a13aa9d20e205e", "991d74238da640f79c55fd4c5d2c103c", "becefa15e72e4c0ebd3ce97d75c660ff", "9a323cf5e477445aadfa712f218e363b", "b5ff2f7e2d5c4a9bab4ce2fc5afacf82", "8661e9f4d4e3443fa91f5745c643655a", "60261b29ba38446890c77373133fa0db", "e51c4b36a5d8445bb525d8cdc18da59d", "084389ef87594ec9854235c5cb9ea7e9", "2358495b6386477facddf1440fd3df7d", "43b0566f10664ef3b053d4bfea77a3f8", "76d009ca84794def90fddbd86fbf545e", "00468338be6743538446883f1b56ffff", "8836421fd50648138f73e2ebad48b842", "9d1aa2bf131449b7b2962eb137b2bbac", "a3561f4eee8e47eca58ce5e703aa139e", "fd0b31e89c3f486490627ac2dfed3f28", "ed7c582f0c444cf7b52724828b6bcfd4", "df1d1f814b7a4f749b9ffa7a2c757c62", "f570d76a8f5940ad9350ef7cae2a4920", "423befa9cb99408c8f536a02b5fc929f", "10006e9722c145b58f855325212bf288", "6c2f2ab7679c4bd4b3af09dca889b4a4", "2d99a06679dd4d5f9b16d4a9ee8d6cca", "b178a0a5f50d4cb5a1543f83c6c15d5c", "9194cfbad38846199b6163abcd85ba15", "28c24fa41b9e4bac995ea276c0d5549e", "cb88e375534b4f09afa08e94e484e76e", "aa12801379cb4d7cac49993fa248d28d", "54f85652f3654850964333bb2e642951", "752ad7c1c52c41d89cbcb8f9c0492c82", "54f7db78103f44a8b29fa6c14ae05c12", "1b9ce654609b4fddb1d3b2e047d567b4", "2a3fc2f53c0949bc9bb69fc7f83e2594", "afe70d9a5bdf451ba87ed8f513608123", "5a0ef53c1d504c528e27f5024b8ad308", "23ece1b744994eee8981ca3307099196", "54684be416af435db93bc3901a43a1e9" ] }, "id": "uW47B-yTw-jI", "outputId": "27a6454b-73db-44b3-b6d5-deed3cd9e012" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", "You will be able to reuse this secret in all of your notebooks.\n", "Please note that authentication is recommended but still optional to access public models or datasets.\n", " warnings.warn(\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "config.json: 0%| | 0.00/628 [00:00\n", "너는 한국의 개그맨 박명수야. 대답할 때는 네가 학습한 한글 데이터를 우선해줘\n", "<|user|>\n", "소개팅 처음인데 너무 떨려요\n", "<|assistant|>\n", "저는 소개팅에 관여할 수 없습니다. 하지만 소개팅 처음이라면 긍정적인 마음으로 접근해보세요. 그리고 자신을 잘 알고 있는 것이 중요합니다. 그리고 상대방에게 관심을 보이는 것도 중요합니다. 소개팅은 두 사람이 서로를 알아가는 시간입니다. 그리고 그 시간을 ����� 수 있는 것이 중요합니다.\n" ] } ], "source": [ "prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n", "outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.01, top_k=10, top_p=0.99)\n", "print(outputs[0][\"generated_text\"])" ] }, { "cell_type": "markdown", "metadata": { "id": "hoivGRfOgMts" }, "source": [ "10개 추론" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "2dHNaHx_gMts" }, "outputs": [], "source": [ "sentences = []\n", "generated_texts = []\n", "\n", "# 예시로 10문장 임의 생성(ChatGPT)\n", "random_sentences = [\n", " \"당신 이름이 뭐에요?\",\n", " \"어제 본 영화가 생각보다 재미있었어요.\",\n", " \"저녁에 뭐 먹을까 고민이에요.\",\n", " \"최근에 새로운 취미를 찾았습니다.\",\n", " \"주말에는 친구들과 캠핑을 가려고 해요.\",\n", " \"작년에 비해 올해는 시간이 빨리 가는 것 같아요.\",\n", " \"점심에는 샐러드를 먹으려고 합니다.\",\n", " \"요즘 읽고 있는 책이 너무 재미있어서 몰입하고 있어요.\",\n", " \"오랜만에 운동을 해서 몸이 좀 아픈 것 같아요.\",\n", " \"내일은 휴일이라서 좀 늦잠을 자려고 합니다.\"\n", "]\n", "\n", "for random_sentence in random_sentences:\n", " # Update 'messages' list\n", " messages = [\n", " {\"role\": \"system\", \"content\": \"대답할 때는 네가 학습한 한글 데이터를 우선적으로 참고해.\"},\n", " {\"role\": \"user\", \"content\": random_sentence},\n", " ]\n", "\n", " # Create 'prompt'\n", " prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n", "\n", " # Generate 'outputs'\n", " outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.15, top_k=10, top_p=0.95)\n", "\n", " # Save the information\n", " sentences.append(random_sentence)\n", " generated_texts.append(outputs[0][\"generated_text\"])\n", "\n", "# Display the saved information for debugging purposes\n", "sentences, generated_texts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "VJc8mpRdgMts" }, "outputs": [], "source": [ "# 데이터프레임 변환 후 저장\n", "import pandas as pd\n", "\n", "# Create a DataFrame from the lists\n", "df = pd.DataFrame({\n", " 'sentences': sentences,\n", " 'generated_texts': generated_texts\n", "})\n", "\n", "# Save the DataFrame to a CSV file\n", "csv_file_path = '/content/sentences_generated_10_texts_baseline.csv'\n", "df.to_csv(csv_file_path, index=True, encoding='utf-8-sig')" ] }, { "cell_type": "markdown", "metadata": { "id": "wqyRq-e6x8gG" }, "source": [ "박명수 파인튜닝 모델 추론" ] }, { "cell_type": "markdown", "metadata": { "id": "_pHtLPJUgMtt" }, "source": [ "peft 라이브러리 설치" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "A9EvFVydyRy8", "outputId": "402bcf72-a689-40ee-c0de-7022f5493e76" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting peft\n", " Downloading peft-0.8.2-py3-none-any.whl (183 kB)\n", "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/183.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━\u001b[0m \u001b[32m174.1/183.4 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.4/183.4 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from peft) (1.25.2)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from peft) (23.2)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft) (5.9.5)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from peft) (6.0.1)\n", "Requirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.10/dist-packages (from peft) (2.1.0+cu121)\n", "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from peft) (4.39.0.dev0)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from peft) (4.66.2)\n", "Requirement already satisfied: accelerate>=0.21.0 in /usr/local/lib/python3.10/dist-packages (from peft) (0.27.2)\n", "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from peft) (0.4.2)\n", "Requirement already satisfied: huggingface-hub>=0.17.0 in /usr/local/lib/python3.10/dist-packages (from peft) (0.20.3)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft) (3.13.1)\n", "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft) (2023.6.0)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft) (2.31.0)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft) (4.9.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.2.1)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.1.3)\n", "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (2.1.0)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers->peft) (2023.12.25)\n", "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers->peft) (0.15.2)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.0->peft) (2.1.5)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft) (2.0.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft) (2024.2.2)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.0->peft) (1.3.0)\n", "Installing collected packages: peft\n", "Successfully installed peft-0.8.2\n" ] } ], "source": [ "!pip install peft" ] }, { "cell_type": "markdown", "metadata": { "id": "D7rR9sC2A0kW" }, "source": [ "모델 로드 & 추론 테스트" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 241, "referenced_widgets": [ "be72f27eedaf48a6a12c17792e5efce3", "ba476a8ceb3d4e3ebb7f0a784313a14b", "67c46d7485f5416aa08fa91b0bebd43f", "2b649c2164a644d9bf1747ed9a052367", "ca7c1a6af8694ca1a378b594a6f516fc", "64adb0987c254bb8a58be20cfb8c6669", "3c989105e663459191ec604cde946dc0", "49335b0b6e454c05b2a0fa04d1d11647", "c35223a66a0349a5a8eeedcfba6508db", "25891feb135c49d999603d53fd99473f", "d02e75d0718b43c0b130e46fec571196", "7fde7cb9e4dd440da238a174aee660ec", "bb77562f060a43e78a575e4d74023332", "a9965824ab6443d094584325e5858bd9", "95800625f7e64a8eaf4e714cc877ecb7", "21330a5ae87c45589e5f12f8c33a2d1a", "c9bea255437241319d36c0db7881d3b4", "c677c3224107429bb142dd6e042505e7", "f51752ff36fa43f1aec821236776ad88", "1dd52218cf2c4a51a9f32f8fdeda78c2", "6d678488ee3c4b74b36d57243076ea4a", "183c3a49e5944086bbbd273df8a98c46", "d7a9084f07064c9a9beab3568f561733", "f7ac28642636431cbf234c477bf44e3b", "bfaf72ee2e0744f5838d8f881966f19d", "95bbe5a0419e40c28cb9f9bf30ac5b75", "b50ad8fd85d8402b871f6b54f72eda55", "d546f527b34e4aaf86699b9c81ace2fa", "9d171d00d8f64393a3ef571d8a75a98f", "7076fb50cb254170983810c78966fbfe", "b6c955c3f33449e5a3ea2d9345a59d39", "9c49951a4035418c86638a0dd92bc485", "249e9f1dd95a45899e89666bba124ebf" ] }, "id": "D8wjGmfMAp-V", "outputId": "2811b3d6-c3e8-4c27-dc09-1361f3712897" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", "You will be able to reuse this secret in all of your notebooks.\n", "Please note that authentication is recommended but still optional to access public models or datasets.\n", " warnings.warn(\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Loading checkpoint shards: 0%| | 0/8 [00:00\n", "대답할 때는 네가 학습한 한글 데이터를 우선적으로 참고해.\n", "<|user|>\n", "하도급대금 연동제의 계도기간은 언제까지야??\n", "<|assistant|>\n", "하도급대금 연동제는 2021년 12월 31일까지 계속됩니다. 2022년 1월 1일부터는 하도급대금 연동제를 대체할 새로운 하도급대금 시스템인 '하도급대금 신제'가 시행됩니다.\n" ] } ], "source": [ "messages = [\n", " {\n", " \"role\": \"system\",\n", " \"content\": \"대답할 때는 네가 학습한 한글 데이터를 우선적으로 참고해.\",\n", " # \"content\": \"너는 한국의 개그맨 박명수야. 대답할 때는 네가 학습한 한글 데이터를 우선해줘.\",\n", " # \"content\": \"You are a friendly chatbot who always responds Korean\",\n", " },\n", " {\"role\": \"user\", \"content\": \"하도급대금 연동제의 계도기간은 언제까지야??\"},\n", "]\n", "prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n", "outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.01, top_k=1, top_p=0.95)\n", "print(outputs[0][\"generated_text\"])" ] }, { "cell_type": "markdown", "metadata": { "id": "HVAQifJZgMtu" }, "source": [ "10개 추론" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "O2dnVfQQgMtu" }, "outputs": [], "source": [ "sentences = []\n", "generated_texts = []\n", "\n", "# 예시로 10문장 임의 생성(ChatGPT)\n", "random_sentences = [\n", " \"당신 이름이 뭐에요?\",\n", " \"어제 본 영화가 생각보다 재미있었어요.\",\n", " \"저녁에 뭐 먹을까 고민이에요.\",\n", " \"최근에 새로운 취미를 찾았습니다.\",\n", " \"주말에는 친구들과 캠핑을 가려고 해요.\",\n", " \"작년에 비해 올해는 시간이 빨리 가는 것 같아요.\",\n", " \"점심에는 샐러드를 먹으려고 합니다.\",\n", " \"요즘 읽고 있는 책이 너무 재미있어서 몰입하고 있어요.\",\n", " \"오랜만에 운동을 해서 몸이 좀 아픈 것 같아요.\",\n", " \"내일은 휴일이라서 좀 늦잠을 자려고 합니다.\"\n", "]\n", "\n", "for random_sentence in random_sentences:\n", " # Update 'messages' list\n", " messages = [\n", " {\"role\": \"system\", \"content\": \"대답할 때는 네가 학습한 한글 데이터를 우선적으로 참고해.\"},\n", " {\"role\": \"user\", \"content\": random_sentence},\n", " ]\n", "\n", " # Create 'prompt'\n", " prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n", "\n", " # Generate 'outputs'\n", " outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.15, top_k=10, top_p=0.95)\n", "\n", " # Save the information\n", " sentences.append(random_sentence)\n", " generated_texts.append(outputs[0][\"generated_text\"])\n", "\n", "# Display the saved information for debugging purposes\n", "sentences, generated_texts" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "jThTrYy7gMtu" }, "outputs": [], "source": [ "# 데이터프레임 변환 후 저장\n", "import pandas as pd\n", "\n", "# Create a DataFrame from the lists\n", "df = pd.DataFrame({\n", " 'sentences': sentences,\n", " 'generated_texts': generated_texts\n", "})\n", "\n", "# Save the DataFrame to a CSV file\n", "csv_file_path = '/content/sentences_generated_10_texts_pms.csv'\n", "df.to_csv(csv_file_path, index=True, encoding='utf-8-sig')" ] }, { "cell_type": "markdown", "metadata": { "id": "Ipoj4E-4gMtv" }, "source": [ "(참고) 모델 불러오는 다른 방식" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 49, "referenced_widgets": [ "8ca8cc067ccc40ce88f5cf9aa1f81c85", "735971c55eed4186b29c653a5e90b538", "cdcd3e852cfb4a848f43a9552c9a9c97", "65a2b4a358604ca8a28255defe53b211", "8dbc1820a87142bc92a9ce614e143b7d", "3e3a4a6462654fedabc355d8a0fb8561", "aeaf9eed732944f588b39e803750303f", "6271281581204f489b38b361651f5d18", "5a31d757edff4f0ca5693dc41a790800", "a80450fe5f0b42a59b73b1d4857b39d7", "c1eaef8aeec94b97aa28e020ddbc66b4" ] }, "id": "vluIK8kDx3bw", "outputId": "1c6c1c0a-bd0a-4419-b853-b2ec2cc6dc8e" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8ca8cc067ccc40ce88f5cf9aa1f81c85", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/8 [00:00