diff --git "a/.ipynb_checkpoints/phi-2-custom-checkpoint.ipynb" "b/.ipynb_checkpoints/phi-2-custom-checkpoint.ipynb" --- "a/.ipynb_checkpoints/phi-2-custom-checkpoint.ipynb" +++ "b/.ipynb_checkpoints/phi-2-custom-checkpoint.ipynb" @@ -547,7 +547,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 26, "id": "18d5599f-992d-4d8e-a90c-4d43774be473", "metadata": {}, "outputs": [ @@ -555,19 +555,19 @@ "name": "stdout", "output_type": "stream", "text": [ - "trainable params: 7,864,320 || all params: 2,787,548,160 || trainable%: 0.2821231974697076\n" + "trainable params: 18,350,080 || all params: 2,798,033,920 || trainable%: 0.6558204984162593\n" ] } ], "source": [ - "config = LoraConfig(\n", - " r=16,\n", - " lora_alpha=16,\n", - " target_modules=[\"q_proj\",\"k_proj\",\"v_proj\"],\n", - " lora_dropout=0.05,\n", - " bias=\"none\",\n", - " task_type=\"CAUSAL_LM\"\n", - ")\n", + "# config = LoraConfig(\n", + "# r=16,\n", + "# lora_alpha=16,\n", + "# #target_modules=[\"q_proj\",\"k_proj\",\"v_proj\"],\n", + "# lora_dropout=0.05,\n", + "# bias=\"none\",\n", + "# task_type=\"CAUSAL_LM\"\n", + "# )\n", "\n", "model = get_peft_model(model, config)\n", "model.print_trainable_parameters()" @@ -575,7 +575,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 27, "id": "baeee903-3dce-48b2-93c3-7a697d8c6daf", "metadata": {}, "outputs": [], @@ -587,10 +587,20 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 28, "id": "28a9b24a-a822-4fcb-96b3-d77b7ea30a5f", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -609,7 +619,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 29, "id": "1ee7fd2a-38e4-4f23-a978-0bdeeda64d8b", "metadata": {}, "outputs": [], @@ -624,7 +634,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 30, "id": "e84c29e2-843e-42c2-8c0f-324d392e671c", "metadata": {}, "outputs": [ @@ -632,7 +642,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Tokenizing data: 100%|██████████| 16412/16412 [00:02<00:00, 6502.58 examples/s]\n" + "Tokenizing data: 100%|██████████| 16412/16412 [00:02<00:00, 7265.30 examples/s]\n" ] }, { @@ -644,7 +654,7 @@ "})" ] }, - "execution_count": 17, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -659,7 +669,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 31, "id": "ac968254-5338-49df-950d-222b82647407", "metadata": {}, "outputs": [], @@ -680,7 +690,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 32, "id": "729df2d0-0890-4ac4-adf3-c167a6e9669d", "metadata": {}, "outputs": [ @@ -719,7 +729,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 33, "id": "525f81a4-eb92-466e-bb9c-cd63122231ab", "metadata": {}, "outputs": [ @@ -737,11 +747,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: jupyterlab in /usr/local/lib/python3.11/site-packages (4.1.0)\n", - "Collecting jupyterlab\n", - " Downloading jupyterlab-4.1.1-py3-none-any.whl.metadata (15 kB)\n", - "Collecting jupyterlab-git\n", - " Downloading jupyterlab_git-0.50.0-py3-none-any.whl.metadata (31 kB)\n", + "Requirement already satisfied: jupyterlab in /usr/local/lib/python3.11/site-packages (4.1.1)\n", + "Requirement already satisfied: jupyterlab-git in /usr/local/lib/python3.11/site-packages (0.50.0)\n", "Requirement already satisfied: async-lru>=1.0.0 in /usr/local/lib/python3.11/site-packages (from jupyterlab) (2.0.4)\n", "Requirement already satisfied: httpx>=0.25.0 in /usr/local/lib/python3.11/site-packages (from jupyterlab) (0.26.0)\n", "Requirement already satisfied: ipykernel in /usr/local/lib/python3.11/site-packages (from jupyterlab) (6.29.1)\n", @@ -754,8 +761,7 @@ "Requirement already satisfied: packaging in /usr/local/lib/python3.11/site-packages (from jupyterlab) (23.2)\n", "Requirement already satisfied: tornado>=6.2.0 in /usr/local/lib/python3.11/site-packages (from jupyterlab) (6.4)\n", "Requirement already satisfied: traitlets in /usr/local/lib/python3.11/site-packages (from jupyterlab) (5.14.1)\n", - "Collecting nbdime~=4.0.1 (from jupyterlab-git)\n", - " Downloading nbdime-4.0.1-py3-none-any.whl.metadata (9.5 kB)\n", + "Requirement already satisfied: nbdime~=4.0.1 in /usr/local/lib/python3.11/site-packages (from jupyterlab-git) (4.0.1)\n", "Requirement already satisfied: nbformat in /usr/local/lib/python3.11/site-packages (from jupyterlab-git) (5.9.2)\n", "Requirement already satisfied: pexpect in /usr/local/lib/python3.11/site-packages (from jupyterlab-git) (4.9.0)\n", "Requirement already satisfied: anyio in /usr/local/lib/python3.11/site-packages (from httpx>=0.25.0->jupyterlab) (4.2.0)\n", @@ -781,14 +787,10 @@ "Requirement already satisfied: json5>=0.9.0 in /usr/local/lib/python3.11/site-packages (from jupyterlab-server<3,>=2.19.0->jupyterlab) (0.9.14)\n", "Requirement already satisfied: jsonschema>=4.18.0 in /usr/local/lib/python3.11/site-packages (from jupyterlab-server<3,>=2.19.0->jupyterlab) (4.21.1)\n", "Requirement already satisfied: requests>=2.31 in /usr/local/lib/python3.11/site-packages (from jupyterlab-server<3,>=2.19.0->jupyterlab) (2.31.0)\n", - "Collecting colorama (from nbdime~=4.0.1->jupyterlab-git)\n", - " Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n", - "Collecting gitpython!=2.1.4,!=2.1.5,!=2.1.6 (from nbdime~=4.0.1->jupyterlab-git)\n", - " Downloading GitPython-3.1.41-py3-none-any.whl.metadata (14 kB)\n", - "Collecting jupyter-server-mathjax>=0.2.2 (from nbdime~=4.0.1->jupyterlab-git)\n", - " Downloading jupyter_server_mathjax-0.2.6-py3-none-any.whl (3.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m265.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: pygments in /usr/local/lib/python3.11/site-packages (from nbdime~=4.0.1->jupyterlab-git) (2.17.2)\n", + "Requirement already satisfied: colorama in /usr/local/lib/python3.11/site-packages (from nbdime~=4.0.1->jupyterlab-git) (0.4.6)\n", + "Requirement already satisfied: gitpython!=2.1.4,!=2.1.5,!=2.1.6 in /usr/local/lib/python3.11/site-packages (from nbdime~=4.0.1->jupyterlab-git) (3.1.41)\n", + "Requirement already satisfied: jupyter-server-mathjax>=0.2.2 in /usr/local/lib/python3.11/site-packages (from nbdime~=4.0.1->jupyterlab-git) (0.2.6)\n", + "Requirement already satisfied: pygments in /usr/local/lib/python3.11/site-packages (from nbdime~=4.0.1->jupyterlab-git) (2.17.2)\n", "Requirement already satisfied: fastjsonschema in /usr/local/lib/python3.11/site-packages (from nbformat->jupyterlab-git) (2.19.1)\n", "Requirement already satisfied: comm>=0.1.1 in /usr/local/lib/python3.11/site-packages (from ipykernel->jupyterlab) (0.2.1)\n", "Requirement already satisfied: debugpy>=1.6.5 in /usr/local/lib/python3.11/site-packages (from ipykernel->jupyterlab) (1.8.0)\n", @@ -797,8 +799,7 @@ "Requirement already satisfied: nest-asyncio in /usr/local/lib/python3.11/site-packages (from ipykernel->jupyterlab) (1.6.0)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.11/site-packages (from ipykernel->jupyterlab) (5.9.8)\n", "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.11/site-packages (from pexpect->jupyterlab-git) (0.7.0)\n", - "Collecting gitdb<5,>=4.0.1 (from gitpython!=2.1.4,!=2.1.5,!=2.1.6->nbdime~=4.0.1->jupyterlab-git)\n", - " Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.11/site-packages (from gitpython!=2.1.4,!=2.1.5,!=2.1.6->nbdime~=4.0.1->jupyterlab-git) (4.0.11)\n", "Requirement already satisfied: decorator in /usr/local/lib/python3.11/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab) (5.1.1)\n", "Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.11/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab) (0.19.1)\n", "Requirement already satisfied: prompt-toolkit<3.1.0,>=3.0.41 in /usr/local/lib/python3.11/site-packages (from ipython>=7.23.1->ipykernel->jupyterlab) (3.0.43)\n", @@ -825,8 +826,7 @@ "Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.11/site-packages (from argon2-cffi->jupyter-server<3,>=2.4.0->jupyterlab) (21.2.0)\n", "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.11/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->jupyterlab) (1.16.0)\n", "Requirement already satisfied: webencodings in /usr/local/lib/python3.11/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->jupyterlab) (0.5.1)\n", - "Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=2.1.4,!=2.1.5,!=2.1.6->nbdime~=4.0.1->jupyterlab-git)\n", - " Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.11/site-packages (from gitdb<5,>=4.0.1->gitpython!=2.1.4,!=2.1.5,!=2.1.6->nbdime~=4.0.1->jupyterlab-git) (5.0.1)\n", "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.11/site-packages (from jedi>=0.16->ipython>=7.23.1->ipykernel->jupyterlab) (0.8.3)\n", "Requirement already satisfied: fqdn in /usr/local/lib/python3.11/site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->jupyterlab) (1.5.1)\n", "Requirement already satisfied: isoduration in /usr/local/lib/python3.11/site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->jupyterlab) (20.11.0)\n", @@ -842,23 +842,6 @@ "Requirement already satisfied: pycparser in /usr/local/lib/python3.11/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->jupyterlab) (2.21)\n", "Requirement already satisfied: arrow>=0.15.0 in /usr/local/lib/python3.11/site-packages (from isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->jupyterlab) (1.3.0)\n", "Requirement already satisfied: types-python-dateutil>=2.8.10 in /usr/local/lib/python3.11/site-packages (from arrow>=0.15.0->isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.9.0->jupyter-server<3,>=2.4.0->jupyterlab) (2.8.19.20240106)\n", - "Downloading jupyterlab-4.1.1-py3-none-any.whl (11.4 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.4/11.4 MB\u001b[0m \u001b[31m434.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading jupyterlab_git-0.50.0-py3-none-any.whl (1.2 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m546.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading nbdime-4.0.1-py3-none-any.whl (5.9 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.9/5.9 MB\u001b[0m \u001b[31m279.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading GitPython-3.1.41-py3-none-any.whl (196 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m196.4/196.4 kB\u001b[0m \u001b[31m474.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading gitdb-4.0.11-py3-none-any.whl (62 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.7/62.7 kB\u001b[0m \u001b[31m406.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hDownloading smmap-5.0.1-py3-none-any.whl (24 kB)\n", - "Installing collected packages: smmap, colorama, gitdb, gitpython, jupyter-server-mathjax, nbdime, jupyterlab, jupyterlab-git\n", - " Attempting uninstall: jupyterlab\n", - " Found existing installation: jupyterlab 4.1.0\n", - " Uninstalling jupyterlab-4.1.0:\n", - " Successfully uninstalled jupyterlab-4.1.0\n", - "Successfully installed colorama-0.4.6 gitdb-4.0.11 gitpython-3.1.41 jupyter-server-mathjax-0.2.6 jupyterlab-4.1.1 jupyterlab-git-0.50.0 nbdime-4.0.1 smmap-5.0.1\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } @@ -912,9 +895,11 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 34, "id": "3bf553b6-b26c-49c3-9407-74c8d53a395e", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { @@ -923,7 +908,7 @@ "
\n", " \n", " \n", - " [1100/1100 03:27, Epoch 0/1]\n", + " [1100/1100 12:52, Epoch 0/1]\n", "
\n", " \n", " \n", @@ -935,47 +920,47 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", "
1001.4731001.317300
2001.2361001.162700
3001.2248001.160700
4001.1579001.096200
5001.1778001.111000
6001.1505001.088400
7001.1781001.122100
8001.1664001.102400
9001.1384001.067200
10001.1440001.079000
11001.1633001.097900

" @@ -987,13 +972,20 @@ "metadata": {}, "output_type": "display_data" }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Checkpoint destination directory ./checkpoint-1100 already exists and is non-empty.Saving will proceed but saved results may be invalid.\n" + ] + }, { "data": { "text/plain": [ - "TrainOutput(global_step=1100, training_loss=1.200930439342152, metrics={'train_runtime': 208.0362, 'train_samples_per_second': 21.15, 'train_steps_per_second': 5.288, 'total_flos': 3.5907056959488e+16, 'train_loss': 1.200930439342152, 'epoch': 0.27})" + "TrainOutput(global_step=1100, training_loss=1.1277108417857777, metrics={'train_runtime': 773.3658, 'train_samples_per_second': 5.689, 'train_steps_per_second': 1.422, 'total_flos': 3.6048790880256e+16, 'train_loss': 1.1277108417857777, 'epoch': 0.27})" ] }, - "execution_count": 23, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } @@ -1010,7 +1002,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 35, "id": "263cc15e-8e9d-4bd8-9708-ec1638bc1165", "metadata": {}, "outputs": [ @@ -1018,9 +1010,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00, 1.34s/it]\n", - "adapter_config.json: 100%|██████████| 592/592 [00:00<00:00, 3.99MB/s]\n", - "adapter_model.safetensors: 100%|██████████| 31.5M/31.5M [00:00<00:00, 117MB/s]\n" + "Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00, 1.90s/it]\n", + "adapter_config.json: 100%|██████████| 613/613 [00:00<00:00, 3.83MB/s]\n", + "adapter_model.safetensors: 100%|██████████| 73.4M/73.4M [00:00<00:00, 110MB/s]\n" ] }, { @@ -1054,7 +1046,7 @@ ")" ] }, - "execution_count": 24, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -1071,7 +1063,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "id": "8eef4f4f-52da-4ba9-8a22-2b7874420562", "metadata": { "scrolled": true @@ -1081,444 +1073,559 @@ "name": "stderr", "output_type": "stream", "text": [ + "README.md: 100%|██████████| 5.18k/5.18k [00:00<00:00, 17.1MB/s]\n", "model-00001-of-00003.safetensors: 0%| | 0.00/4.98G [00:00