diff --git "a/zephyr-7b-beta-inc/inference_innocean_demo_240221(TG).ipynb" "b/zephyr-7b-beta-inc/inference_innocean_demo_240221(TG).ipynb" new file mode 100644--- /dev/null +++ "b/zephyr-7b-beta-inc/inference_innocean_demo_240221(TG).ipynb" @@ -0,0 +1,10266 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ldo7vN9_yfm2" + }, + "source": [ + "환경설정(Colab 내 한글 오류 제거)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "scNoqBhEyhfV" + }, + "outputs": [], + "source": [ + "import locale\n", + "def getpreferredencoding(do_setlocale = True):\n", + " return \"UTF-8\"\n", + "locale.getpreferredencoding = getpreferredencoding" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "muzP7Pygx-sa" + }, + "source": [ + "baseline model(HuggingFaceH4/zephyr-7b-alpha) & framework 다운로드" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JKyH9LK_w6R3", + "outputId": "95aac920-3208-42cc-9643-e81dca6a470d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.37.2)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.13.1)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.20.3)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.25.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2023.12.25)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.31.0)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.15.2)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.4.2)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.66.2)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (2023.6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers) (4.9.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.2.2)\n" + ] + } + ], + "source": [ + "!pip install transformers" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NqFqCDwoxSFx", + "outputId": "71310549-36d2-4c34-9a93-6d4f2ffa1ca9" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting accelerate\n", + " Downloading accelerate-0.27.2-py3-none-any.whl (279 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/280.0 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[90m╺\u001b[0m\u001b[90m━━━━━━━━━━━━━\u001b[0m \u001b[32m184.3/280.0 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m280.0/280.0 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate) (1.25.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (23.2)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate) (5.9.5)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate) (6.0.1)\n", + "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate) (2.1.0+cu121)\n", + "Requirement already satisfied: huggingface-hub in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.20.3)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from accelerate) (0.4.2)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.13.1)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (4.9.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (3.1.3)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2023.6.0)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate) (2.1.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (2.31.0)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub->accelerate) (4.66.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate) (2.1.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub->accelerate) (2024.2.2)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->accelerate) (1.3.0)\n", + "Installing collected packages: accelerate\n", + "Successfully installed accelerate-0.27.2\n" + ] + } + ], + "source": [ + "!pip install accelerate" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "vQKnRsvwxWxm", + "outputId": "b513a3e1-733c-4996-a186-3ba549827e88" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting git+https://github.com/huggingface/transformers.git\n", + " Cloning https://github.com/huggingface/transformers.git to /tmp/pip-req-build-ue682nud\n", + " Running command git clone --filter=blob:none --quiet https://github.com/huggingface/transformers.git /tmp/pip-req-build-ue682nud\n", + " Resolved https://github.com/huggingface/transformers.git to commit 58245ba6fba739a03388f52b0773d2fc965701e3\n", + " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (3.13.1)\n", + "Requirement already satisfied: huggingface-hub<1.0,>=0.19.3 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (0.20.3)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (1.25.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (6.0.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (2023.12.25)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (2.31.0)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (0.15.2)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (0.4.2)\n", + "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.39.0.dev0) (4.66.2)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.39.0.dev0) (2023.6.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.19.3->transformers==4.39.0.dev0) (4.9.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.39.0.dev0) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.39.0.dev0) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.39.0.dev0) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.39.0.dev0) (2024.2.2)\n", + "Building wheels for collected packages: transformers\n", + " Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for transformers: filename=transformers-4.39.0.dev0-py3-none-any.whl size=8515607 sha256=81123cdaf3dfe5dd14d1083f793e0c1f46b05c4e85e84aa33c4c44b2d030a291\n", + " Stored in directory: /tmp/pip-ephem-wheel-cache-jc8kbl1o/wheels/e7/9c/5b/e1a9c8007c343041e61cc484433d512ea9274272e3fcbe7c16\n", + "Successfully built transformers\n", + "Installing collected packages: transformers\n", + " Attempting uninstall: transformers\n", + " Found existing installation: transformers 4.37.2\n", + " Uninstalling transformers-4.37.2:\n", + " Successfully uninstalled transformers-4.37.2\n", + "Successfully installed transformers-4.39.0.dev0\n" + ] + } + ], + "source": [ + "!pip install git+https://github.com/huggingface/transformers.git" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p87VpRJVgMtp" + }, + "source": [ + "pipeline 정의" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "ly-zzDbuvd_q" + }, + "outputs": [], + "source": [ + "import torch\n", + "from transformers import pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 739, + "referenced_widgets": [ + "468bd0804e714998be8e423b71194a81", + "8a5379b6a9d146e1af2752b289d584da", + "550ae6e683134c41a43f279c366526f1", + "4d519807dbc442b18b2361b53004f6e8", + "344f93e3b81d4830abcd5c39c7ef947d", + "75e3dbf3566e4c9d9e7e00c656f73635", + "5bcc78c41b6f44348080173e54081341", + "7ef7788986eb47c392941160f0d2cc68", + "9734547751ac486db67f8bae5fee010d", + "8d925e5f1c784698a758af138d1e5d44", + "23385b0820b14a90a58b7a0a5fb759c6", + "77c378c2f22d4ab897c18ed5177d025b", + "ecf0e68dcada44108c4611e10c0ed0bd", + "70c15531c671416d9d09f324e9c8a5b9", + "5becc78a20404ca78b255a3b3948d64a", + "29a17397d8e24a66a622699969d7ee56", + "1cd0f4a8171a4c7693cd48f2a6d7a8fb", + "8a8eb317c9274f91a4197ee6f84e6ae9", + "915cf0f45d6d473daa12c3e85f172319", + "cb2ca8c5a5ac4c1b903fce5ad21e896b", + "11417aea5939423a8cbb58615c393d5c", + "7ae0f085cf5a479299a0dbad1c91a0dd", + "1c00a7eb95684f469f65b4a9d8db20ac", + "5c280ca23257444486344302ed31419f", + "46ee5a1b84034dae861b18d5a7351f14", + "747360fb8fad424dbbfb8c215cf0151e", + "bdc138142e6b442ab3ac7ae35b933ccc", + "cdbde524f1394121bccd402d26b4d6e2", + "1205857581734f64a01b7aba4af542bb", + "1d406b22f86d4e2e82c42f1548a617e5", + "636f96633b2a4452b143790c595aa537", + "de95266049ad478a9e7cd94a7c3775e8", + "f3376dbcb1674675bb20702d0fd3f94f", + "27f662a223454a9e9f0695028818d109", + "7681f1e214454958a0372ef7a549e09d", + "91946046baae412e968e21697748d203", + "bcbe795bb4a24f61b9a0a66d5f8edd1d", + "054644158eb84f9989bd83a1b8ae0b0b", + "cd48c57b926f476c86547a080fb3479f", + "5b748122b9434c3a80f6a8848be9a11f", + "8509825df02c4eba96ef584293fb5e77", + "753a3767ded54747a940a7329ca7d3d9", + "6e62ebe098354db792d24d8bf8d5e87b", + "b9266b2163c742f186d11d139661cb94", + "547d956e763045ffba5da87e751d86e7", + "84130fb9ed8d48e2b900bef6e05211a9", + "a8e2cb25b41b475f85bfee173648fb4e", + "d50699846b0249849518b6a0ac8f4718", + "33c5ec6cdfed4a1b8793dd7862cce04e", + "1c8629b46ef24b7a9bbfaa7af6ff3470", + "d0d033fc9f914a478a542385fd07d2a3", + "eeaace1a016f432d8069ac859154bb18", + "d121471a61994149a514c2db8e1fd8f8", + "768ec4f1781c443996d95eabf3b96f8d", + "b53023c325b2488bb79bbf9a4373b875", + "de770e11bfde48448fc9cdad4386351f", + "ae86779f841c4e4f9ee68cc1dab174a7", + "e435c644d7624dec8527d78a8bb28a11", + "5babd4d5265b4b3fb0bd22f92ef3940e", + "71e93ae749c541caaf067f4a4bb674a6", + "83e7bb93699746a598d82cc73375d0ef", + "b6efa7f14e18499faa2be71fa0b076ba", + "901cb1f79211404b818f5f4ea14af008", + "0af40e57008c4521841347bbc8859737", + "5e78890de7144b4a8ca50e3ebfd6109a", + "4ebe00eb9c5846d2ae4f182ac846fb91", + "9c5da00c2dcc4f83a0b5b3eaaec3b7f6", + "78c9abfeccb9484f8cb752a6c06d083e", + "5e6ca3d29c614a30826a9138159f29b9", + "0012a24edaee43199442d329787e914c", + "8a15543b3cce4a5a865dfa269e8e4211", + "bccf30986c70435e9d061aef9cc65e50", + "18dea682afb044cbaac1909ad1ab759e", + "f9d6b68ecce94066978c5a72e80d9a17", + "e8881ad19bdb4a69a0c999dcc6f417c6", + "8277e3377db348dab12eded778d6aa03", + "06a98ce3f4bc4ddfac2b8602eb47da14", + "ad63f621d597461bb682415441ead9a2", + "c927a1bf7b074f0f923b0a4e8df028e9", + "4bc5f214ea3f4c76b580ca0be3b5e616", + "4fe647a38cd74339b31f9ff176904bff", + "830fb3a891a24ab1b4cf61ef42f66ec7", + "de00d5bbaf404e3a975f18d1bc28dcd2", + "33dd7869df1b432685538e5edbec9f0e", + "c6f3e043e865471c9b4a0b050eef81da", + "56d03ef552fd4ca1bf777b1bbfc3029c", + "ffe9b6c024b14cadbfba8980a230f92f", + "538a1625178d4ef391dc973123fadfc5", + "43c885b49aaa46aab6a233ec445b3ed0", + "bbb1f95de98748c99f0f4bef552e7346", + "23d6cc092b8043779f790d6589c111f5", + "5542f954b75c4490a6a63fa1e748d220", + "81a497f6e6994634b2caadb449b64112", + "2900434390804ae388ce5595e00611b1", + "9a75c22b79f149ad83d452fa4c03b3a0", + "c2aa0012342a44bda99c8514201bccb5", + "35e25252ca42448aa9eb0665a8e69a1e", + "638189870edd44df96527a2f2f29fb22", + "69810c4c21de4bdab2f01bed59876e93", + "2193b5f244064ff8995caad45a66f7ad", + "8c2b8c0f1e9740a9aae2b586c56bad89", + "cc9e0979913f4f70b8f4cac24c9f8c3a", + "b72334eba0c54a86b90a159bdcee75c5", + "1b1b3329f9ec414aaf079d2bb755e733", + "80298bc10e654aeda3a866bac9417fe7", + "e09c507eaa924fb7a83ea4eaeddaf58e", + "0b31cef1cdb648f592f53876627a78ab", + "78097ec5c54749688ff2abc648d93ce3", + "ab2c6354776a47c9b7314bef893b3c17", + "14683454ce6348c4a4b6435ce084e000", + "37d1da0871e547058e12db99b0eb22a2", + "5df56d1ec2f94c6f91fda3303d1bfe1b", + "e9a6fa482cc443289e52529c4913458d", + "b0b4e590c3a84a73a60cb25a068eefd6", + "82fd59de90e34be9a8410011d88690af", + "649339abc04a4757b6e821d6b7c44f09", + "2e2e283d967f468588556963c964a7e9", + "b7a9c5ef0b8d434ab0862047731be62c", + "f7d4248e4e5e45d085159979f23713c8", + "965ac469199047b29ed516b36ba5acd0", + "1d7fa5c6f6f8429cbee8f1a9172b0669", + "fa6a8cea4f424bd68501eb1582fdb1e3", + "cc1af154497d43558689ec66e4352552", + "9382476e51704c4eaabec060c0b553af", + "8cb3c781ca1c4f08a4474ab25068ab69", + "e3bf27766ed44fb5b0008d746ef61bf8", + "003c26b1dccc4c6c8f41a7ed76ff140f", + "fb7a181139c845a28f6810cfd0923ebf", + "d1bce0f5e1a24e1d976ee0e6c26f56ee", + "b7d665c1050048a38a3bb51a00847d00", + "de47dc40aefe44248b1bca50381ed340", + "fa931161dfea47909c8145d9926819b1", + "7df47628683d40c7bcf51ccc6764392b", + "998e27b55cbc4109ae2bcd3ab22d204d", + "a73d65b164514b68ba7e636b37801704", + "1f19411e8cb746edb2f94289752fd39a", + "953e10ae051e46b3a632c5ffbb7cb404", + "899334a346ad481080d0c000aa896258", + "6caaee008776462c95e0cc77d8f5bfc8", + "3e79a03ba76f43d386526084326006f8", + "0cdbe6641a994981bdf363ae026204c0", + "fbd3490843f0468bb95438d9bb4377f6", + "32005b300c4843c8a7e7455079050ac8", + "b55b6646fbd0484fa76d51b62be95f80", + "59c97d50285e4e7c8bb0ec2413327194", + "295c12c1da2b4354953fae9d7d965808", + "5d81a9c15af94e40b84471d83e5fce6c", + "723dfe54e7f6453b84d46666d42c3fe7", + "005faa11d32041ad83e80c35c0edbb28", + "51377cce9f184de29083b80ccce3cdfc", + "de6cd0b168b34bf08bcdec636861ee10", + "e9f5a6ea928f4dda95cbe38deb1cd659", + "fe8ea301b90b4122bff70bb07f83528f", + "ec0a9fb66279418fac74635e4f46a3f3", + "735d7902b41547cea71c42b42e2cbab0", + "4bcdef1203394ebcabfe295129e7f06e", + "cf29e4a8f7684c23a6909888a30f1e2a", + "39fe16abf08d45b493d10d2529d8c1bf", + "a725a02887b64e0d9c03606b573bdb88", + "b941b878c05b480f98d820ffd5622c5d", + "88e14dae021b41e482a13aa9d20e205e", + "991d74238da640f79c55fd4c5d2c103c", + "becefa15e72e4c0ebd3ce97d75c660ff", + "9a323cf5e477445aadfa712f218e363b", + "b5ff2f7e2d5c4a9bab4ce2fc5afacf82", + "8661e9f4d4e3443fa91f5745c643655a", + "60261b29ba38446890c77373133fa0db", + "e51c4b36a5d8445bb525d8cdc18da59d", + "084389ef87594ec9854235c5cb9ea7e9", + "2358495b6386477facddf1440fd3df7d", + "43b0566f10664ef3b053d4bfea77a3f8", + "76d009ca84794def90fddbd86fbf545e", + "00468338be6743538446883f1b56ffff", + "8836421fd50648138f73e2ebad48b842", + "9d1aa2bf131449b7b2962eb137b2bbac", + "a3561f4eee8e47eca58ce5e703aa139e", + "fd0b31e89c3f486490627ac2dfed3f28", + "ed7c582f0c444cf7b52724828b6bcfd4", + "df1d1f814b7a4f749b9ffa7a2c757c62", + "f570d76a8f5940ad9350ef7cae2a4920", + "423befa9cb99408c8f536a02b5fc929f", + "10006e9722c145b58f855325212bf288", + "6c2f2ab7679c4bd4b3af09dca889b4a4", + "2d99a06679dd4d5f9b16d4a9ee8d6cca", + "b178a0a5f50d4cb5a1543f83c6c15d5c", + "9194cfbad38846199b6163abcd85ba15", + "28c24fa41b9e4bac995ea276c0d5549e", + "cb88e375534b4f09afa08e94e484e76e", + "aa12801379cb4d7cac49993fa248d28d", + "54f85652f3654850964333bb2e642951", + "752ad7c1c52c41d89cbcb8f9c0492c82", + "54f7db78103f44a8b29fa6c14ae05c12", + "1b9ce654609b4fddb1d3b2e047d567b4", + "2a3fc2f53c0949bc9bb69fc7f83e2594", + "afe70d9a5bdf451ba87ed8f513608123", + "5a0ef53c1d504c528e27f5024b8ad308", + "23ece1b744994eee8981ca3307099196", + "54684be416af435db93bc3901a43a1e9" + ] + }, + "id": "uW47B-yTw-jI", + "outputId": "27a6454b-73db-44b3-b6d5-deed3cd9e012" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "config.json: 0%| | 0.00/628 [00:00\n", + "너는 한국의 개그맨 박명수야. 대답할 때는 네가 학습한 한글 데이터를 우선해줘\n", + "<|user|>\n", + "소개팅 처음인데 너무 떨려요\n", + "<|assistant|>\n", + "저는 소개팅에 관여할 수 없습니다. 하지만 소개팅 처음이라면 긍정적인 마음으로 접근해보세요. 그리고 자신을 잘 알고 있는 것이 중요합니다. 그리고 상대방에게 관심을 보이는 것도 중요합니다. 소개팅은 두 사람이 서로를 알아가는 시간입니다. 그리고 그 시간을 ����� 수 있는 것이 중요합니다.\n" + ] + } + ], + "source": [ + "prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n", + "outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.01, top_k=10, top_p=0.99)\n", + "print(outputs[0][\"generated_text\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hoivGRfOgMts" + }, + "source": [ + "10개 추론" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2dHNaHx_gMts" + }, + "outputs": [], + "source": [ + "sentences = []\n", + "generated_texts = []\n", + "\n", + "# 예시로 10문장 임의 생성(ChatGPT)\n", + "random_sentences = [\n", + " \"당신 이름이 뭐에요?\",\n", + " \"어제 본 영화가 생각보다 재미있었어요.\",\n", + " \"저녁에 뭐 먹을까 고민이에요.\",\n", + " \"최근에 새로운 취미를 찾았습니다.\",\n", + " \"주말에는 친구들과 캠핑을 가려고 해요.\",\n", + " \"작년에 비해 올해는 시간이 빨리 가는 것 같아요.\",\n", + " \"점심에는 샐러드를 먹으려고 합니다.\",\n", + " \"요즘 읽고 있는 책이 너무 재미있어서 몰입하고 있어요.\",\n", + " \"오랜만에 운동을 해서 몸이 좀 아픈 것 같아요.\",\n", + " \"내일은 휴일이라서 좀 늦잠을 자려고 합니다.\"\n", + "]\n", + "\n", + "for random_sentence in random_sentences:\n", + " # Update 'messages' list\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": \"대답할 때는 네가 학습한 한글 데이터를 우선적으로 참고해.\"},\n", + " {\"role\": \"user\", \"content\": random_sentence},\n", + " ]\n", + "\n", + " # Create 'prompt'\n", + " prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n", + "\n", + " # Generate 'outputs'\n", + " outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.15, top_k=10, top_p=0.95)\n", + "\n", + " # Save the information\n", + " sentences.append(random_sentence)\n", + " generated_texts.append(outputs[0][\"generated_text\"])\n", + "\n", + "# Display the saved information for debugging purposes\n", + "sentences, generated_texts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VJc8mpRdgMts" + }, + "outputs": [], + "source": [ + "# 데이터프레임 변환 후 저장\n", + "import pandas as pd\n", + "\n", + "# Create a DataFrame from the lists\n", + "df = pd.DataFrame({\n", + " 'sentences': sentences,\n", + " 'generated_texts': generated_texts\n", + "})\n", + "\n", + "# Save the DataFrame to a CSV file\n", + "csv_file_path = '/content/sentences_generated_10_texts_baseline.csv'\n", + "df.to_csv(csv_file_path, index=True, encoding='utf-8-sig')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wqyRq-e6x8gG" + }, + "source": [ + "박명수 파인튜닝 모델 추론" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_pHtLPJUgMtt" + }, + "source": [ + "peft 라이브러리 설치" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "A9EvFVydyRy8", + "outputId": "402bcf72-a689-40ee-c0de-7022f5493e76" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting peft\n", + " Downloading peft-0.8.2-py3-none-any.whl (183 kB)\n", + "\u001b[?25l \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m0.0/183.4 kB\u001b[0m \u001b[31m?\u001b[0m eta \u001b[36m-:--:--\u001b[0m\r\u001b[2K \u001b[91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[91m╸\u001b[0m\u001b[90m━━\u001b[0m \u001b[32m174.1/183.4 kB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:01\u001b[0m\r\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.4/183.4 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from peft) (1.25.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from peft) (23.2)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft) (5.9.5)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from peft) (6.0.1)\n", + "Requirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.10/dist-packages (from peft) (2.1.0+cu121)\n", + "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from peft) (4.39.0.dev0)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from peft) (4.66.2)\n", + "Requirement already satisfied: accelerate>=0.21.0 in /usr/local/lib/python3.10/dist-packages (from peft) (0.27.2)\n", + "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from peft) (0.4.2)\n", + "Requirement already satisfied: huggingface-hub>=0.17.0 in /usr/local/lib/python3.10/dist-packages (from peft) (0.20.3)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft) (3.13.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft) (2023.6.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft) (2.31.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft) (4.9.0)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.1.3)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (2.1.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers->peft) (2023.12.25)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers->peft) (0.15.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.0->peft) (2.1.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft) (2024.2.2)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.0->peft) (1.3.0)\n", + "Installing collected packages: peft\n", + "Successfully installed peft-0.8.2\n" + ] + } + ], + "source": [ + "!pip install peft" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D7rR9sC2A0kW" + }, + "source": [ + "모델 로드 & 추론 테스트" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241, + "referenced_widgets": [ + "be72f27eedaf48a6a12c17792e5efce3", + "ba476a8ceb3d4e3ebb7f0a784313a14b", + "67c46d7485f5416aa08fa91b0bebd43f", + "2b649c2164a644d9bf1747ed9a052367", + "ca7c1a6af8694ca1a378b594a6f516fc", + "64adb0987c254bb8a58be20cfb8c6669", + "3c989105e663459191ec604cde946dc0", + "49335b0b6e454c05b2a0fa04d1d11647", + "c35223a66a0349a5a8eeedcfba6508db", + "25891feb135c49d999603d53fd99473f", + "d02e75d0718b43c0b130e46fec571196", + "7fde7cb9e4dd440da238a174aee660ec", + "bb77562f060a43e78a575e4d74023332", + "a9965824ab6443d094584325e5858bd9", + "95800625f7e64a8eaf4e714cc877ecb7", + "21330a5ae87c45589e5f12f8c33a2d1a", + "c9bea255437241319d36c0db7881d3b4", + "c677c3224107429bb142dd6e042505e7", + "f51752ff36fa43f1aec821236776ad88", + "1dd52218cf2c4a51a9f32f8fdeda78c2", + "6d678488ee3c4b74b36d57243076ea4a", + "183c3a49e5944086bbbd273df8a98c46", + "d7a9084f07064c9a9beab3568f561733", + "f7ac28642636431cbf234c477bf44e3b", + "bfaf72ee2e0744f5838d8f881966f19d", + "95bbe5a0419e40c28cb9f9bf30ac5b75", + "b50ad8fd85d8402b871f6b54f72eda55", + "d546f527b34e4aaf86699b9c81ace2fa", + "9d171d00d8f64393a3ef571d8a75a98f", + "7076fb50cb254170983810c78966fbfe", + "b6c955c3f33449e5a3ea2d9345a59d39", + "9c49951a4035418c86638a0dd92bc485", + "249e9f1dd95a45899e89666bba124ebf" + ] + }, + "id": "D8wjGmfMAp-V", + "outputId": "2811b3d6-c3e8-4c27-dc09-1361f3712897" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/8 [00:00\n", + "대답할 때는 네가 학습한 한글 데이터를 우선적으로 참고해.\n", + "<|user|>\n", + "하도급대금 연동제의 계도기간은 언제까지야??\n", + "<|assistant|>\n", + "하도급대금 연동제는 2021년 12월 31일까지 계속됩니다. 2022년 1월 1일부터는 하도급대금 연동제를 대체할 새로운 하도급대금 시스템인 '하도급대금 신제'가 시행됩니다.\n" + ] + } + ], + "source": [ + "messages = [\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": \"대답할 때는 네가 학습한 한글 데이터를 우선적으로 참고해.\",\n", + " # \"content\": \"너는 한국의 개그맨 박명수야. 대답할 때는 네가 학습한 한글 데이터를 우선해줘.\",\n", + " # \"content\": \"You are a friendly chatbot who always responds Korean\",\n", + " },\n", + " {\"role\": \"user\", \"content\": \"하도급대금 연동제의 계도기간은 언제까지야??\"},\n", + "]\n", + "prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n", + "outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.01, top_k=1, top_p=0.95)\n", + "print(outputs[0][\"generated_text\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HVAQifJZgMtu" + }, + "source": [ + "10개 추론" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O2dnVfQQgMtu" + }, + "outputs": [], + "source": [ + "sentences = []\n", + "generated_texts = []\n", + "\n", + "# 예시로 10문장 임의 생성(ChatGPT)\n", + "random_sentences = [\n", + " \"당신 이름이 뭐에요?\",\n", + " \"어제 본 영화가 생각보다 재미있었어요.\",\n", + " \"저녁에 뭐 먹을까 고민이에요.\",\n", + " \"최근에 새로운 취미를 찾았습니다.\",\n", + " \"주말에는 친구들과 캠핑을 가려고 해요.\",\n", + " \"작년에 비해 올해는 시간이 빨리 가는 것 같아요.\",\n", + " \"점심에는 샐러드를 먹으려고 합니다.\",\n", + " \"요즘 읽고 있는 책이 너무 재미있어서 몰입하고 있어요.\",\n", + " \"오랜만에 운동을 해서 몸이 좀 아픈 것 같아요.\",\n", + " \"내일은 휴일이라서 좀 늦잠을 자려고 합니다.\"\n", + "]\n", + "\n", + "for random_sentence in random_sentences:\n", + " # Update 'messages' list\n", + " messages = [\n", + " {\"role\": \"system\", \"content\": \"대답할 때는 네가 학습한 한글 데이터를 우선적으로 참고해.\"},\n", + " {\"role\": \"user\", \"content\": random_sentence},\n", + " ]\n", + "\n", + " # Create 'prompt'\n", + " prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)\n", + "\n", + " # Generate 'outputs'\n", + " outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.15, top_k=10, top_p=0.95)\n", + "\n", + " # Save the information\n", + " sentences.append(random_sentence)\n", + " generated_texts.append(outputs[0][\"generated_text\"])\n", + "\n", + "# Display the saved information for debugging purposes\n", + "sentences, generated_texts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jThTrYy7gMtu" + }, + "outputs": [], + "source": [ + "# 데이터프레임 변환 후 저장\n", + "import pandas as pd\n", + "\n", + "# Create a DataFrame from the lists\n", + "df = pd.DataFrame({\n", + " 'sentences': sentences,\n", + " 'generated_texts': generated_texts\n", + "})\n", + "\n", + "# Save the DataFrame to a CSV file\n", + "csv_file_path = '/content/sentences_generated_10_texts_pms.csv'\n", + "df.to_csv(csv_file_path, index=True, encoding='utf-8-sig')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ipoj4E-4gMtv" + }, + "source": [ + "(참고) 모델 불러오는 다른 방식" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 49, + "referenced_widgets": [ + "8ca8cc067ccc40ce88f5cf9aa1f81c85", + "735971c55eed4186b29c653a5e90b538", + "cdcd3e852cfb4a848f43a9552c9a9c97", + "65a2b4a358604ca8a28255defe53b211", + "8dbc1820a87142bc92a9ce614e143b7d", + "3e3a4a6462654fedabc355d8a0fb8561", + "aeaf9eed732944f588b39e803750303f", + "6271281581204f489b38b361651f5d18", + "5a31d757edff4f0ca5693dc41a790800", + "a80450fe5f0b42a59b73b1d4857b39d7", + "c1eaef8aeec94b97aa28e020ddbc66b4" + ] + }, + "id": "vluIK8kDx3bw", + "outputId": "1c6c1c0a-bd0a-4419-b853-b2ec2cc6dc8e" + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8ca8cc067ccc40ce88f5cf9aa1f81c85", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/8 [00:00