{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "id": "FuXIFTFapAMI", "outputId": "c8ced1ad-c7b3-44ba-807b-26d7d13906bc" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" ] } ], "source": [ "# You only need to run this once per machine\n", "!pip install -q -U bitsandbytes\n", "!pip install -q -U git+https://github.com/huggingface/transformers.git\n", "!pip install -q -U git+https://github.com/huggingface/peft.git\n", "!pip install -q -U git+https://github.com/huggingface/accelerate.git\n", "!pip install -q -U datasets scipy ipywidgets matplotlib" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "s6f4z8EYmcJ6" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0bafa7b9bdc443b2a435c6e25d892591", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading data files: 0%| | 0/1 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "def plot_data_lengths(tokenize_train_dataset, tokenized_val_dataset):\n", " lengths = [len(x['input_ids']) for x in tokenized_train_dataset]\n", " lengths += [len(x['input_ids']) for x in tokenized_val_dataset]\n", " print(len(lengths))\n", "\n", " # Plotting the histogram\n", " plt.figure(figsize=(10, 6))\n", " plt.hist(lengths, bins=20, alpha=0.7, color='blue')\n", " plt.xlabel('Length of input_ids')\n", " plt.ylabel('Frequency')\n", " plt.title('Distribution of Lengths of input_ids')\n", " plt.show()\n", "\n", "plot_data_lengths(tokenized_train_dataset, tokenized_val_dataset)" ] }, { "cell_type": "markdown", "metadata": { "id": "nBk4Qp_vyRgh" }, "source": [ "From here, you can choose where you'd like to set the `max_length` to be. You can truncate and pad training examples to fit them to your chosen size. Be aware that choosing a larger `max_length` has its compute tradeoffs.\n", "\n", "I'm using my personal notes to train the model, and they vary greatly in length. I spent some time cleaning the dataset so the samples were about the same length, cutting up individual notes if needed, but being sure to not cut in the middle of a word or sentence." ] }, { "cell_type": "markdown", "metadata": { "id": "bMlw8h743m19" }, "source": [ "Now let's tokenize again with padding and truncation, and set up the tokenize function to make labels and input_ids the same. This is basically what [self-supervised fine-tuning is](https://neptune.ai/blog/self-supervised-learning)." ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "id": "BA8M9yfC3m19", "outputId": "99c6d302-9bb6-47b1-cae9-a1cd870b4770" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "981\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1IAAAIjCAYAAAAJLyrXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAABHyUlEQVR4nO3deVxVdf7H8fcFZBcQlU0JTXFfc4vRTBNFJcrRGZexUkdzKizX8mebWpmTlakt2tQk2Z6VlU5auE+OmppmmpGYSyaIowFiCgjf3x89uNMVVA4i9wKv5+NxH3m+53vO+ZzLYXn3Ped7bcYYIwAAAABAqbk5uwAAAAAAqGwIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAqr0ZM2bIZrNVyLF69OihHj162JfXr18vm82mDz74oEKOP3LkSDVo0KBCjlVWOTk5GjNmjMLCwmSz2TRhwgRnl1TuKvrrfjmrVq1Su3bt5O3tLZvNpszMzBL7JSUlyWaz6dChQxVa39Vg5VwaNGigkSNHXvWaAFQuBCkAVUrRH0dFL29vb0VERCguLk4LFizQ6dOny+U4x44d04wZM7Rr165y2V95cuXaSuPJJ59UUlKS7r77br3xxhu6/fbbL9q3QYMGuvnmmyuwOmvefvttzZs3z9llXNLJkyc1ePBg+fj46MUXX9Qbb7whPz8/Z5dVKt99951mzJhRJYIdgMrHw9kFAMDV8Nhjj6lhw4bKz89Xenq61q9frwkTJmju3Ln69NNP1aZNG3vfhx9+WP/3f/9naf/Hjh3TzJkz1aBBA7Vr167U233xxReWjlMWl6rtlVdeUWFh4VWv4UqsXbtW119/vaZPn+7sUq7Y22+/rT179rj0qNq2bdt0+vRpPf7444qNjb1k39tvv11Dhw6Vl5dXBVV3ad99951mzpypHj16WB5pdbVzAVD5EKQAVEn9+vVTx44d7cvTpk3T2rVrdfPNN+uWW27Rvn375OPjI0ny8PCQh8fV/XH466+/ytfXV56enlf1OJdTo0YNpx6/NDIyMtSiRQtnl1FtZGRkSJKCgoIu29fd3V3u7u5XuaKKUZXOBYBzcGsfgGrjpptu0iOPPKLDhw/rzTfftLeX9IxUcnKyunXrpqCgIPn7+6tp06Z68MEHJf32fEunTp0kSaNGjbLfRpiUlCTpt+egWrVqpR07dqh79+7y9fW1b3vhM1JFCgoK9OCDDyosLEx+fn665ZZb9NNPPzn0udhzGr/f5+VqK+kZqTNnzmjy5MmKjIyUl5eXmjZtqmeeeUbGGId+NptN48aN08cff6xWrVrJy8tLLVu21KpVq0p+wy+QkZGh0aNHKzQ0VN7e3mrbtq1ef/11+/qi54YOHjyof/3rX/bay+O2rTfffFMdOnSQj4+PgoODNXTo0GLvb9HX7bvvvlPPnj3l6+urevXqac6cOcX2d/jwYd1yyy3y8/NTSEiIJk6cqM8//1w2m03r16+37+9f//qXDh8+bD+XC9/7wsJCzZo1S/Xr15e3t7d69eql1NRUhz779+/XoEGDFBYWJm9vb9WvX19Dhw5VVlbWZc976dKl9vOuU6eObrvtNv38888O5zxixAhJUqdOnWSz2S75LFBJzxUV3V755ZdfqnPnzvL29ta1116rJUuWlLjtxo0b9be//U21a9dWQECA7rjjDv3yyy8OfW02m2bMmFHs+L//HkhKStKf//xnSVLPnj3t73HR+385JZ2LMUZPPPGE6tevL19fX/Xs2VN79+4ttm1+fr5mzpyp6OhoeXt7q3bt2urWrZuSk5NLdWwAVQMjUgCqldtvv10PPvigvvjiC915550l9tm7d69uvvlmtWnTRo899pi8vLyUmpqqTZs2SZKaN2+uxx57TI8++qjGjh2rG264QZL0hz/8wb6PkydPql+/fho6dKhuu+02hYaGXrKuWbNmyWazaerUqcrIyNC8efMUGxurXbt22UfOSqM0tf2eMUa33HKL1q1bp9GjR6tdu3b6/PPPdf/99+vnn3/Wc88959D/yy+/1EcffaR77rlHNWvW1IIFCzRo0CAdOXJEtWvXvmhdZ8+eVY8ePZSamqpx48apYcOGWrp0qUaOHKnMzEyNHz9ezZs31xtvvKGJEyeqfv36mjx5siSpbt26pT7/ksyaNUuPPPKIBg8erDFjxujEiRN6/vnn1b17d+3cudNhJOaXX35R3759NXDgQA0ePFgffPCBpk6dqtatW6tfv36SfgueN910k9LS0jR+/HiFhYXp7bff1rp16xyO+9BDDykrK0tHjx61v4/+/v4Off7+97/Lzc1NU6ZMUVZWlubMmaPhw4dr69atkqS8vDzFxcUpNzdX9957r8LCwvTzzz9rxYoVyszMVGBg4EXPOykpSaNGjVKnTp00e/ZsHT9+XPPnz9emTZvs5/3QQw+padOm+sc//mG/HbZRo0aW3+PU1FT96U9/0ujRozVixAi99tprGjlypDp06KCWLVs69B03bpyCgoI0Y8YMpaSkaOHChTp8+LA9SJdW9+7ddd9992nBggV68MEH1bx5c0my/7csHn30UT3xxBPq37+/+vfvr6+//lp9+vRRXl6eQ78ZM2Zo9uzZGjNmjDp37qzs7Gxt375dX3/9tXr37l3m4wOoZAwAVCGLFy82ksy2bdsu2icwMNC0b9/evjx9+nTz+x+Hzz33nJFkTpw4cdF9bNu2zUgyixcvLrbuxhtvNJLMokWLSlx344032pfXrVtnJJl69eqZ7Oxse/v7779vJJn58+fb26KiosyIESMuu89L1TZixAgTFRVlX/7444+NJPPEE0849PvTn/5kbDabSU1NtbdJMp6eng5t33zzjZFknn/++WLH+r158+YZSebNN9+0t+Xl5ZmYmBjj7+/vcO5RUVEmPj7+kvsrbd9Dhw4Zd3d3M2vWLIf2b7/91nh4eDi0F33dlixZYm/Lzc01YWFhZtCgQfa2Z5991kgyH3/8sb3t7NmzplmzZkaSWbdunb09Pj7e4f0uUvR1b968ucnNzbW3z58/30gy3377rTHGmJ07dxpJZunSpZd/M34nLy/PhISEmFatWpmzZ8/a21esWGEkmUcffdTeVprvmQv7Hjx40N4WFRVlJJmNGzfa2zIyMoyXl5eZPHlysW07dOhg8vLy7O1z5swxkswnn3xib5Nkpk+fXuz4F34PLF26tNh7XloXnktGRobx9PQ08fHxprCw0N7vwQcfNJIcjtu2bdtSX6MAqi5u7QNQ7fj7+19y9r6iEYpPPvmkzBMzeHl5adSoUaXuf8cdd6hmzZr25T/96U8KDw/XZ599Vqbjl9Znn30md3d33XfffQ7tkydPljFGK1eudGiPjY11GLFo06aNAgIC9OOPP172OGFhYRo2bJi9rUaNGrrvvvuUk5OjDRs2lMPZFPfRRx+psLBQgwcP1n//+1/7KywsTNHR0cVGkfz9/XXbbbfZlz09PdW5c2eH81u1apXq1aunW265xd7m7e190RHOSxk1apTDc3NFI4hFxysacfr888/166+/lnq/27dvV0ZGhu655x55e3vb2+Pj49WsWTP961//slzrpbRo0cJeu/TbKGLTpk1LvC7Gjh3r8Kze3XffLQ8Pj6t+rV/O6tWrlZeXp3vvvddhZKykiUKCgoK0d+9e7d+/vwIrBOBqCFIAqp2cnByH0HKhIUOGqGvXrhozZoxCQ0M1dOhQvf/++5ZCVb169SxNLBEdHe2wbLPZ1Lhx46s+rfPhw4cVERFR7P0ouj3q8OHDDu3XXHNNsX3UqlWr2DMuJR0nOjpabm6Ov3Yudpzysn//fhljFB0drbp16zq89u3bZ59ooUj9+vWL3V524fkdPnxYjRo1KtavcePGluu78P2sVauWJNmP17BhQ02aNEmvvvqq6tSpo7i4OL344ouXfT6q6P1s2rRpsXXNmjUr9/fbynVx4bXu7++v8PBwp09hXvSeXFhf3bp17V+XIo899pgyMzPVpEkTtW7dWvfff792795dYbUCcA0EKQDVytGjR5WVlXXJP3p9fHy0ceNGrV69Wrfffrt2796tIUOGqHfv3iooKCjVcaw811RaF3t+pLQ1lYeLzXJmLpiYwlUUFhbKZrNp1apVSk5OLvZ6+eWXHfpX9PmV5njPPvusdu/erQcffFBnz57Vfffdp5YtW+ro0aNXpaayqKj3rSKv9Uvp3r27Dhw4oNdee02tWrXSq6++quuuu06vvvqqs0sDUIEIUgCqlTfeeEOSFBcXd8l+bm5u6tWrl+bOnavvvvtOs2bN0tq1a+23gll5KL40LrxFyBij1NRUh1neatWqpczMzGLbXji6YKW2qKgoHTt2rNitjt9//719fXmIiorS/v37i43qlfdxLtSoUSMZY9SwYUPFxsYWe11//fWW9xkVFaUDBw4UCwkXzrYnld910rp1az388MPauHGj/v3vf+vnn3/WokWLLlmjJKWkpBRbl5KSctXe79K48FrPyclRWlraZa/1vLw8paWlObSV5/dh0XtyYX0nTpwocWQtODhYo0aN0jvvvKOffvpJbdq0KXGmQQBVF0EKQLWxdu1aPf7442rYsKGGDx9+0X6nTp0q1lb0wba5ubmSJD8/P0kqMdiUxZIlSxzCzAcffKC0tDT7THHSb6Fgy5YtDjOIrVixotg03lZq69+/vwoKCvTCCy84tD/33HOy2WwOx78S/fv3V3p6ut577z172/nz5/X888/L399fN954Y7kc50IDBw6Uu7u7Zs6cWSz4GGN08uRJy/uMi4vTzz//rE8//dTedu7cOb3yyivF+vr5+ZVqmvKLyc7O1vnz5x3aWrduLTc3N/u1WJKOHTsqJCREixYtcui3cuVK7du3T/Hx8WWu6Ur94x//UH5+vn154cKFOn/+fLFrfePGjcW2u3BEqjy/D2NjY1WjRg09//zzDtfKvHnzivW98Lrx9/dX48aNL/k1AVD1MP05gCpp5cqV+v7773X+/HkdP35ca9euVXJysqKiovTpp586PIB/occee0wbN25UfHy8oqKilJGRoZdeekn169dXt27dJP32h15QUJAWLVqkmjVrys/PT126dFHDhg3LVG9wcLC6deumUaNG6fjx45o3b54aN27sMIHBmDFj9MEHH6hv374aPHiwDhw4oDfffLPYdNVWaktISFDPnj310EMP6dChQ2rbtq2++OILffLJJ5owYUKZpsIuydixY/Xyyy9r5MiR2rFjhxo0aKAPPvhAmzZt0rx58y75zNrlpKam6oknnijW3r59e8XHx+uJJ57QtGnTdOjQIQ0YMEA1a9bUwYMHtWzZMo0dO1ZTpkyxdLy//e1veuGFFzRs2DCNHz9e4eHheuutt+zX1O9HSTp06KD33ntPkyZNUqdOneTv76+EhIRSH2vt2rUaN26c/vznP6tJkyY6f/683njjDbm7u2vQoEEX3a5GjRp66qmnNGrUKN14440aNmyYffrzBg0aaOLEiZbOuTzl5eWpV69eGjx4sFJSUvTSSy+pW7duDpN3jBkzRnfddZcGDRqk3r1765tvvtHnn3+uOnXqOOyrXbt2cnd311NPPaWsrCx5eXnppptuUkhIiOW66tatqylTpmj27Nm6+eab1b9/f+3cuVMrV64sdtwWLVqoR48e6tChg4KDg7V9+3Z98MEHGjduXNneFACVk3MmCwSAq6NoSuOil6enpwkLCzO9e/c28+fPd5hmu8iF05+vWbPG3HrrrSYiIsJ4enqaiIgIM2zYMPPDDz84bPfJJ5+YFi1aGA8PD4fpxm+88UbTsmXLEuu72PTn77zzjpk2bZoJCQkxPj4+Jj4+3hw+fLjY9s8++6ypV6+e8fLyMl27djXbt28vts9L1Xbh9OfGGHP69GkzceJEExERYWrUqGGio6PN008/7TAFtDG/TUmdmJhYrKaLTct+oePHj5tRo0aZOnXqGE9PT9O6desSp2i3Ov3577/ev3+NHj3a3u/DDz803bp1M35+fsbPz880a9bMJCYmmpSUFHufi33dSnrPfvzxRxMfH298fHxM3bp1zeTJk82HH35oJJktW7bY++Xk5Ji//OUvJigoyEiy76fo637htOYHDx50+Hr9+OOP5q9//atp1KiR8fb2NsHBwaZnz55m9erVpXp/3nvvPdO+fXvj5eVlgoODzfDhw83Ro0cd+pTH9Oclfb0uvC6Ltt2wYYMZO3asqVWrlvH39zfDhw83J0+edNi2oKDATJ061dSpU8f4+vqauLg4k5qaWuK19sorr5hrr73WuLu7W5oKvaRzKSgoMDNnzjTh4eHGx8fH9OjRw+zZs6fYcZ944gnTuXNnExQUZHx8fEyzZs3MrFmzHKZ1B1D12Yxx0SeEAQCoRObNm6eJEyfq6NGjqlevnrPLcTlFHxC8bds2dezY0dnlAMAV4xkpAAAsOnv2rMPyuXPn9PLLLys6OpoQBQDVBM9IAQBg0cCBA3XNNdeoXbt2ysrK0ptvvqnvv/9eb731lrNLq/ZycnKUk5NzyT5169a96JTtAFBaBCkAACyKi4vTq6++qrfeeksFBQVq0aKF3n33XQ0ZMsTZpVV7zzzzjGbOnHnJPgcPHnSYbh0AyoJnpAAAQJXx448/6scff7xkn27dul1y5k4AKA2CFAAAAABYxGQTAAAAAGARz0hJKiws1LFjx1SzZk2HD1IEAAAAUL0YY3T69GlFRETIze3i404EKUnHjh1TZGSks8sAAAAA4CJ++ukn1a9f/6LrCVKSatasKem3NysgIMDJ1QAAAABwluzsbEVGRtozwsUQpCT77XwBAQEEKQAAAACXfeSHySYAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAizycXQBcW0KCsyv4n+XLnV0BAAAA8BtGpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAipwap2bNnq1OnTqpZs6ZCQkI0YMAApaSkOPTp0aOHbDabw+uuu+5y6HPkyBHFx8fL19dXISEhuv/++3X+/PmKPBUAAAAA1YiHMw++YcMGJSYmqlOnTjp//rwefPBB9enTR9999538/Pzs/e6880499thj9mVfX1/7vwsKChQfH6+wsDD95z//UVpamu644w7VqFFDTz75ZIWeDwAAAIDqwalBatWqVQ7LSUlJCgkJ0Y4dO9S9e3d7u6+vr8LCwkrcxxdffKHvvvtOq1evVmhoqNq1a6fHH39cU6dO1YwZM+Tp6Vlsm9zcXOXm5tqXs7Ozy+mMAAAAAFQHLvWMVFZWliQpODjYof2tt95SnTp11KpVK02bNk2//vqrfd3mzZvVunVrhYaG2tvi4uKUnZ2tvXv3lnic2bNnKzAw0P6KjIy8CmcDAAAAoKpy6ojU7xUWFmrChAnq2rWrWrVqZW//y1/+oqioKEVERGj37t2aOnWqUlJS9NFHH0mS0tPTHUKUJPtyenp6iceaNm2aJk2aZF/Ozs4mTAEAAAAoNZcJUomJidqzZ4++/PJLh/axY8fa/926dWuFh4erV69eOnDggBo1alSmY3l5ecnLy+uK6gUAAABQfbnErX3jxo3TihUrtG7dOtWvX/+Sfbt06SJJSk1NlSSFhYXp+PHjDn2Kli/2XBUAAAAAXAmnBiljjMaNG6dly5Zp7dq1atiw4WW32bVrlyQpPDxckhQTE6Nvv/1WGRkZ9j7JyckKCAhQixYtrkrdAAAAAKo3p97al5iYqLfffluffPKJatasaX+mKTAwUD4+Pjpw4IDefvtt9e/fX7Vr19bu3bs1ceJEde/eXW3atJEk9enTRy1atNDtt9+uOXPmKD09XQ8//LASExO5fQ8AAADAVeHUEamFCxcqKytLPXr0UHh4uP313nvvSZI8PT21evVq9enTR82aNdPkyZM1aNAgLV++3L4Pd3d3rVixQu7u7oqJidFtt92mO+64w+FzpwAAAACgPDl1RMoYc8n1kZGR2rBhw2X3ExUVpc8++6y8ygIAAACAS3KJySYAAAAAoDIhSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgkVOD1OzZs9WpUyfVrFlTISEhGjBggFJSUhz6nDt3TomJiapdu7b8/f01aNAgHT9+3KHPkSNHFB8fL19fX4WEhOj+++/X+fPnK/JUAAAAAFQjTg1SGzZsUGJiorZs2aLk5GTl5+erT58+OnPmjL3PxIkTtXz5ci1dulQbNmzQsWPHNHDgQPv6goICxcfHKy8vT//5z3/0+uuvKykpSY8++qgzTgkAAABANWAzxhhnF1HkxIkTCgkJ0YYNG9S9e3dlZWWpbt26evvtt/WnP/1JkvT999+refPm2rx5s66//nqtXLlSN998s44dO6bQ0FBJ0qJFizR16lSdOHFCnp6elz1udna2AgMDlZWVpYCAgKt6jpVNQoKzK/if5cudXQEAAACqutJmA5d6RiorK0uSFBwcLEnasWOH8vPzFRsba+/TrFkzXXPNNdq8ebMkafPmzWrdurU9RElSXFycsrOztXfv3hKPk5ubq+zsbIcXAAAAAJSWywSpwsJCTZgwQV27dlWrVq0kSenp6fL09FRQUJBD39DQUKWnp9v7/D5EFa0vWleS2bNnKzAw0P6KjIws57MBAAAAUJW5TJBKTEzUnj179O677171Y02bNk1ZWVn2108//XTVjwkAAACg6vBwdgGSNG7cOK1YsUIbN25U/fr17e1hYWHKy8tTZmamw6jU8ePHFRYWZu/z1VdfOeyvaFa/oj4X8vLykpeXVzmfBQAAAIDqwqkjUsYYjRs3TsuWLdPatWvVsGFDh/UdOnRQjRo1tGbNGntbSkqKjhw5opiYGElSTEyMvv32W2VkZNj7JCcnKyAgQC1atKiYEwEAAABQrTh1RCoxMVFvv/22PvnkE9WsWdP+TFNgYKB8fHwUGBio0aNHa9KkSQoODlZAQIDuvfdexcTE6Prrr5ck9enTRy1atNDtt9+uOXPmKD09XQ8//LASExMZdQIAAABwVTg1SC1cuFCS1KNHD4f2xYsXa+TIkZKk5557Tm5ubho0aJByc3MVFxenl156yd7X3d1dK1as0N13362YmBj5+flpxIgReuyxxyrqNAAAAABUMy71OVLOwudIXRyfIwUAAIDqpFJ+jhQAAAAAVAYEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYJGHswsASishwdkVOFq+3NkVAAAAwFkYkQIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWFSmIPXjjz+Wdx0AAAAAUGmUKUg1btxYPXv21Jtvvqlz586Vd00AAAAA4NLKFKS+/vprtWnTRpMmTVJYWJj+9re/6auvvirv2gAAAADAJZUpSLVr107z58/XsWPH9NprryktLU3dunVTq1atNHfuXJ04caK86wQAAAAAl3FFk014eHho4MCBWrp0qZ566imlpqZqypQpioyM1B133KG0tLTyqhMAAAAAXMYVBant27frnnvuUXh4uObOnaspU6bowIEDSk5O1rFjx3TrrbeWV50AAAAA4DI8yrLR3LlztXjxYqWkpKh///5asmSJ+vfvLze333JZw4YNlZSUpAYNGpRnrQAAAADgEsoUpBYuXKi//vWvGjlypMLDw0vsExISon/+859XVBwAAAAAuKIyBan9+/dfto+np6dGjBhRlt0DAAAAgEsr0zNSixcv1tKlS4u1L126VK+//nqp97Nx40YlJCQoIiJCNptNH3/8scP6kSNHymazObz69u3r0OfUqVMaPny4AgICFBQUpNGjRysnJ6cspwUAAAAApVKmIDV79mzVqVOnWHtISIiefPLJUu/nzJkzatu2rV588cWL9unbt6/S0tLsr3feecdh/fDhw7V3714lJydrxYoV2rhxo8aOHVv6kwEAAAAAi8p0a9+RI0fUsGHDYu1RUVE6cuRIqffTr18/9evX75J9vLy8FBYWVuK6ffv2adWqVdq2bZs6duwoSXr++efVv39/PfPMM4qIiCh1LQAAAABQWmUakQoJCdHu3buLtX/zzTeqXbv2FRf1e+vXr1dISIiaNm2qu+++WydPnrSv27x5s4KCguwhSpJiY2Pl5uamrVu3XnSfubm5ys7OdngBAAAAQGmVKUgNGzZM9913n9atW6eCggIVFBRo7dq1Gj9+vIYOHVpuxfXt21dLlizRmjVr9NRTT2nDhg3q16+fCgoKJEnp6ekKCQlx2MbDw0PBwcFKT0+/6H5nz56twMBA+ysyMrLcagYAAABQ9ZXp1r7HH39chw4dUq9eveTh8dsuCgsLdccdd1h6Rupyfh/KWrdurTZt2qhRo0Zav369evXqVeb9Tps2TZMmTbIvZ2dnE6YAAAAAlFqZgpSnp6fee+89Pf744/rmm2/k4+Oj1q1bKyoqqrzrc3DttdeqTp06Sk1NVa9evRQWFqaMjAyHPufPn9epU6cu+lyV9NtzV15eXle1VgAAAABVV5mCVJEmTZqoSZMm5VXLZR09elQnT560fwhwTEyMMjMztWPHDnXo0EGStHbtWhUWFqpLly4VVhcAAACA6qVMQaqgoEBJSUlas2aNMjIyVFhY6LB+7dq1pdpPTk6OUlNT7csHDx7Url27FBwcrODgYM2cOVODBg1SWFiYDhw4oAceeECNGzdWXFycJKl58+bq27ev7rzzTi1atEj5+fkaN26chg4dyox9AAAAAK6aMgWp8ePHKykpSfHx8WrVqpVsNluZDr59+3b17NnTvlz03NKIESO0cOFC7d69W6+//royMzMVERGhPn366PHHH3e4Le+tt97SuHHj1KtXL7m5uWnQoEFasGBBmeoBAAAAgNKwGWOM1Y3q1KmjJUuWqH///lejpgqXnZ2twMBAZWVlKSAgwNnluJSEBGdX4LqWL3d2BQAAAChvpc0GZZr+3NPTU40bNy5zcQAAAABQmZUpSE2ePFnz589XGQazAAAAAKDSK9MzUl9++aXWrVunlStXqmXLlqpRo4bD+o8++qhcigMAAAAAV1SmIBUUFKQ//vGP5V0LAAAAAFQKZQpSixcvLu86AAAAAKDSKNMzUpJ0/vx5rV69Wi+//LJOnz4tSTp27JhycnLKrTgAAAAAcEVlGpE6fPiw+vbtqyNHjig3N1e9e/dWzZo19dRTTyk3N1eLFi0q7zoBAAAAwGWUaURq/Pjx6tixo3755Rf5+PjY2//4xz9qzZo15VYcAAAAALiiMo1I/fvf/9Z//vMfeXp6OrQ3aNBAP//8c7kUBgAAAACuqkwjUoWFhSooKCjWfvToUdWsWfOKiwIAAAAAV1amINWnTx/NmzfPvmyz2ZSTk6Pp06erf//+5VUbAAAAALikMt3a9+yzzyouLk4tWrTQuXPn9Je//EX79+9XnTp19M4775R3jQAAAADgUsoUpOrXr69vvvlG7777rnbv3q2cnByNHj1aw4cPd5h8AgAAAACqojIFKUny8PDQbbfdVp61AAAAAEClUKYgtWTJkkuuv+OOO8pUDAAAAABUBmUKUuPHj3dYzs/P16+//ipPT0/5+voSpAAAAABUaWUKUr/88kuxtv379+vuu+/W/ffff8VFAZVBQoKzK/if5cudXQEAAED1Uqbpz0sSHR2tv//978VGqwAAAACgqim3ICX9NgHFsWPHynOXAAAAAOByynRr36effuqwbIxRWlqaXnjhBXXt2rVcCgMAAAAAV1WmIDVgwACHZZvNprp16+qmm27Ss88+Wx51AQAAAIDLKlOQKiwsLO86AAAAAKDSKNdnpAAAAACgOijTiNSkSZNK3Xfu3LllOQQAAAAAuKwyBamdO3dq586dys/PV9OmTSVJP/zwg9zd3XXdddfZ+9lstvKpEgAAAABcSJmCVEJCgmrWrKnXX39dtWrVkvTbh/SOGjVKN9xwgyZPnlyuRQIAAACAK7EZY4zVjerVq6cvvvhCLVu2dGjfs2eP+vTpU+k+Syo7O1uBgYHKyspSQECAs8txKQkJzq4ApbF8ubMrAAAAqBpKmw3KNNlEdna2Tpw4Uaz9xIkTOn36dFl2CQAAAACVRpmC1B//+EeNGjVKH330kY4ePaqjR4/qww8/1OjRozVw4MDyrhEAAAAAXEqZnpFatGiRpkyZor/85S/Kz8//bUceHho9erSefvrpci0QAAAAAFxNmZ6RKnLmzBkdOHBAktSoUSP5+fmVW2EViWekLo5npCoHnpECAAAoH1f1GakiaWlpSktLU3R0tPz8/HQFmQwAAAAAKo0yBamTJ0+qV69eatKkifr376+0tDRJ0ujRo5n6HAAAAECVV6YgNXHiRNWoUUNHjhyRr6+vvX3IkCFatWpVuRUHAAAAAK6oTJNNfPHFF/r8889Vv359h/bo6GgdPny4XAoDAAAAAFdVphGpM2fOOIxEFTl16pS8vLyuuCgAAAAAcGVlClI33HCDlixZYl+22WwqLCzUnDlz1LNnz3IrDgAAAABcUZlu7ZszZ4569eql7du3Ky8vTw888ID27t2rU6dOadOmTeVdIwAAAAC4lDKNSLVq1Uo//PCDunXrpltvvVVnzpzRwIEDtXPnTjVq1Ki8awQAAAAAl2J5RCo/P199+/bVokWL9NBDD12NmgAAAADApVkekapRo4Z27959NWoBAAAAgEqhTLf23XbbbfrnP/9Z3rUAAAAAQKVQpskmzp8/r9dee02rV69Whw4d5Ofn57B+7ty55VIcAAAAALgiS0Hqxx9/VIMGDbRnzx5dd911kqQffvjBoY/NZiu/6gAAAADABVkKUtHR0UpLS9O6deskSUOGDNGCBQsUGhp6VYoDAAAAAFdk6RkpY4zD8sqVK3XmzJlyLQgAAAAAXF2ZnpEqcmGwQvlISHB2BQAAAAAuxdKIlM1mK/YMFM9EAQAAAKhuLI1IGWM0cuRIeXl5SZLOnTunu+66q9isfR999FH5VQgAAAAALsZSkBoxYoTD8m233VauxQAAAABAZWApSC1evPhq1QEAAAAAlYalZ6QAAAAAAAQpAAAAALCMIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIucGqQ2btyohIQERUREyGaz6eOPP3ZYb4zRo48+qvDwcPn4+Cg2Nlb79+936HPq1CkNHz5cAQEBCgoK0ujRo5WTk1OBZwEAAACgunFqkDpz5ozatm2rF198scT1c+bM0YIFC7Ro0SJt3bpVfn5+iouL07lz5+x9hg8frr179yo5OVkrVqzQxo0bNXbs2Io6BQAAAADVkM0YY5xdhCTZbDYtW7ZMAwYMkPTbaFRERIQmT56sKVOmSJKysrIUGhqqpKQkDR06VPv27VOLFi20bds2dezYUZK0atUq9e/fX0ePHlVERESJx8rNzVVubq59OTs7W5GRkcrKylJAQMDVPdFSSEhwdgWobJYvd3YFAAAAVUN2drYCAwMvmw1c9hmpgwcPKj09XbGxsfa2wMBAdenSRZs3b5Ykbd68WUFBQfYQJUmxsbFyc3PT1q1bL7rv2bNnKzAw0P6KjIy8eicCAAAAoMpx2SCVnp4uSQoNDXVoDw0Nta9LT09XSEiIw3oPDw8FBwfb+5Rk2rRpysrKsr9++umncq4eAAAAQFXm4ewCnMHLy0teXl7OLgMAAABAJeWyI1JhYWGSpOPHjzu0Hz9+3L4uLCxMGRkZDuvPnz+vU6dO2fsAAAAAQHlz2SDVsGFDhYWFac2aNfa27Oxsbd26VTExMZKkmJgYZWZmaseOHfY+a9euVWFhobp06VLhNQMAAACoHpx6a19OTo5SU1PtywcPHtSuXbsUHBysa665RhMmTNATTzyh6OhoNWzYUI888ogiIiLsM/s1b95cffv21Z133qlFixYpPz9f48aN09ChQy86Yx8AAAAAXCmnBqnt27erZ8+e9uVJkyZJkkaMGKGkpCQ98MADOnPmjMaOHavMzEx169ZNq1atkre3t32bt956S+PGjVOvXr3k5uamQYMGacGCBRV+LgAAAACqD5f5HClnKu1c8RWFz5FCZcZnWgEAgMqs0n+OFAAAAAC4KoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABa5dJCaMWOGbDabw6tZs2b29efOnVNiYqJq164tf39/DRo0SMePH3dixQAAAACqA5cOUpLUsmVLpaWl2V9ffvmlfd3EiRO1fPlyLV26VBs2bNCxY8c0cOBAJ1YLAAAAoDrwcHYBl+Ph4aGwsLBi7VlZWfrnP/+pt99+WzfddJMkafHixWrevLm2bNmi66+/vqJLBQAAAFBNuPyI1P79+xUREaFrr71Ww4cP15EjRyRJO3bsUH5+vmJjY+19mzVrpmuuuUabN2++5D5zc3OVnZ3t8AIAAACA0nLpEakuXbooKSlJTZs2VVpammbOnKkbbrhBe/bsUXp6ujw9PRUUFOSwTWhoqNLT0y+539mzZ2vmzJlXsXKg+kpIcHYF/7N8ubMrAAAAVZVLB6l+/frZ/92mTRt16dJFUVFRev/99+Xj41Pm/U6bNk2TJk2yL2dnZysyMvKKagUAAABQfbj8rX2/FxQUpCZNmig1NVVhYWHKy8tTZmamQ5/jx4+X+EzV73l5eSkgIMDhBQAAAAClVamCVE5Ojg4cOKDw8HB16NBBNWrU0Jo1a+zrU1JSdOTIEcXExDixSgAAAABVnUvf2jdlyhQlJCQoKipKx44d0/Tp0+Xu7q5hw4YpMDBQo0eP1qRJkxQcHKyAgADde++9iomJYcY+AAAAAFeVSwepo0ePatiwYTp58qTq1q2rbt26acuWLapbt64k6bnnnpObm5sGDRqk3NxcxcXF6aWXXnJy1QAAAACqOpsxxji7CGfLzs5WYGCgsrKyXOJ5KVea9QyozJi1DwAAWFXabFCpnpECAAAAAFdAkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFjk4ewCAOBqSUhwdgX/s3y5sysAAADliREpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkgBAAAAgEUEKQAAAACwyMPZBQAAqreEBGdX8D/Llzu7AgBAZUGQAoAK4EphQSIwAABwpbi1DwAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFhEkAIAAAAAiwhSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABY5OHsAgAAcBUJCc6u4H+WL3d2BQCAS2FECgAAAAAsYkQKAKohVxp5AQCgMmJECgAAAAAsIkgBAAAAgEUEKQAAAACwiCAFAAAAABYRpAAAAADAIoIUAAAAAFjE9OcAAOCyXGnKfD6sGIArYEQKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWESQAgAAAACLCFIAAAAAYBFBCgAAAAAs8nB2AQAAAFYkJDi7gv9ZvtzZFQBwFkakAAAAAMAiRqQAAADKiNExoPqqMkHqxRdf1NNPP6309HS1bdtWzz//vDp37uzssgAAKBNX+gMdlYOrXTOuFOxc6b3hfSmZK70vpVUlbu177733NGnSJE2fPl1ff/212rZtq7i4OGVkZDi7NAAAAABVkM0YY5xdxJXq0qWLOnXqpBdeeEGSVFhYqMjISN177736v//7v8tun52drcDAQGVlZSkgIOBql3tZrvR/BwAAAICrzZVGpEqbDSr9rX15eXnasWOHpk2bZm9zc3NTbGysNm/eXOI2ubm5ys3NtS9nZWVJ+u1NcwX5+c6uAAAAAKg4LvJnuKT/ZYLLjTdV+iD13//+VwUFBQoNDXVoDw0N1ffff1/iNrNnz9bMmTOLtUdGRl6VGgEAAABcXGCgsyso7vTp0wq8RGGVPkiVxbRp0zRp0iT7cmFhoU6dOqXatWvLZrNd0b6zs7MVGRmpn376ySVuE0T1xHUIV8B1CFfAdQhXwHVYuRhjdPr0aUVERFyyX6UPUnXq1JG7u7uOHz/u0H78+HGFhYWVuI2Xl5e8vLwc2oKCgsq1roCAAL5R4HRch3AFXIdwBVyHcAVch5XHpUaiilT6Wfs8PT3VoUMHrVmzxt5WWFioNWvWKCYmxomVAQAAAKiqKv2IlCRNmjRJI0aMUMeOHdW5c2fNmzdPZ86c0ahRo5xdGgAAAIAqqEoEqSFDhujEiRN69NFHlZ6ernbt2mnVqlXFJqCoCF5eXpo+fXqxWweBisR1CFfAdQhXwHUIV8B1WDVVic+RAgAAAICKVOmfkQIAAACAikaQAgAAAACLCFIAAAAAYBFBCgAAAAAsIkiVoxdffFENGjSQt7e3unTpoq+++srZJaEKmTFjhmw2m8OrWbNm9vXnzp1TYmKiateuLX9/fw0aNKjYB1UfOXJE8fHx8vX1VUhIiO6//36dP3++ok8FlcjGjRuVkJCgiIgI2Ww2ffzxxw7rjTF69NFHFR4eLh8fH8XGxmr//v0OfU6dOqXhw4crICBAQUFBGj16tHJychz67N69WzfccIO8vb0VGRmpOXPmXO1TQyVyuetw5MiRxX4+9u3b16EP1yGuxOzZs9WpUyfVrFlTISEhGjBggFJSUhz6lNfv4fXr1+u6666Tl5eXGjdurKSkpKt9eigjglQ5ee+99zRp0iRNnz5dX3/9tdq2bau4uDhlZGQ4uzRUIS1btlRaWpr99eWXX9rXTZw4UcuXL9fSpUu1YcMGHTt2TAMHDrSvLygoUHx8vPLy8vSf//xHr7/+upKSkvToo48641RQSZw5c0Zt27bViy++WOL6OXPmaMGCBVq0aJG2bt0qPz8/xcXF6dy5c/Y+w4cP1969e5WcnKwVK1Zo48aNGjt2rH19dna2+vTpo6ioKO3YsUNPP/20ZsyYoX/84x9X/fxQOVzuOpSkvn37Ovx8fOeddxzWcx3iSmzYsEGJiYnasmWLkpOTlZ+frz59+ujMmTP2PuXxe/jgwYOKj49Xz549tWvXLk2YMEFjxozR559/XqHni1IyKBedO3c2iYmJ9uWCggITERFhZs+e7cSqUJVMnz7dtG3btsR1mZmZpkaNGmbp0qX2tn379hlJZvPmzcYYYz777DPj5uZm0tPT7X0WLlxoAgICTG5u7lWtHVWDJLNs2TL7cmFhoQkLCzNPP/20vS0zM9N4eXmZd955xxhjzHfffWckmW3bttn7rFy50thsNvPzzz8bY4x56aWXTK1atRyuw6lTp5qmTZte5TNCZXThdWiMMSNGjDC33nrrRbfhOkR5y8jIMJLMhg0bjDHl93v4gQceMC1btnQ41pAhQ0xcXNzVPiWUASNS5SAvL087duxQbGysvc3NzU2xsbHavHmzEytDVbN//35FRETo2muv1fDhw3XkyBFJ0o4dO5Sfn+9wDTZr1kzXXHON/RrcvHmzWrdu7fBB1XFxccrOztbevXsr9kRQJRw8eFDp6ekO111gYKC6dOnicN0FBQWpY8eO9j6xsbFyc3PT1q1b7X26d+8uT09Pe5+4uDilpKTol19+qaCzQWW3fv16hYSEqGnTprr77rt18uRJ+zquQ5S3rKwsSVJwcLCk8vs9vHnzZod9FPXh70nXRJAqB//9739VUFDg8I0hSaGhoUpPT3dSVahqunTpoqSkJK1atUoLFy7UwYMHdcMNN+j06dNKT0+Xp6engoKCHLb5/TWYnp5e4jVatA6wqui6udTPvvT0dIWEhDis9/DwUHBwMNcmyk3fvn21ZMkSrVmzRk899ZQ2bNigfv36qaCgQBLXIcpXYWGhJkyYoK5du6pVq1aSVG6/hy/WJzs7W2fPnr0ap4Mr4OHsAgCUTr9+/ez/btOmjbp06aKoqCi9//778vHxcWJlAOBcQ4cOtf+7devWatOmjRo1aqT169erV69eTqwMVVFiYqL27Nnj8JwyqidGpMpBnTp15O7uXmxmluPHjyssLMxJVaGqCwoKUpMmTZSamqqwsDDl5eUpMzPToc/vr8GwsLASr9GidYBVRdfNpX72hYWFFZt05/z58zp16hTXJq6aa6+9VnXq1FFqaqokrkOUn3HjxmnFihVat26d6tevb28vr9/DF+sTEBDA/zR1QQSpcuDp6akOHTpozZo19rbCwkKtWbNGMTExTqwMVVlOTo4OHDig8PBwdejQQTVq1HC4BlNSUnTkyBH7NRgTE6Nvv/3W4Y+J5ORkBQQEqEWLFhVePyq/hg0bKiwszOG6y87O1tatWx2uu8zMTO3YscPeZ+3atSosLFSXLl3sfTZu3Kj8/Hx7n+TkZDVt2lS1atWqoLNBVXL06FGdPHlS4eHhkrgOceWMMRo3bpyWLVumtWvXqmHDhg7ry+v3cExMjMM+ivrw96SLcvZsF1XFu+++a7y8vExSUpL57rvvzNixY01QUJDDzCzAlZg8ebJZv369OXjwoNm0aZOJjY01derUMRkZGcYYY+666y5zzTXXmLVr15rt27ebmJgYExMTY9/+/PnzplWrVqZPnz5m165dZtWqVaZu3bpm2rRpzjolVAKnT582O3fuNDt37jSSzNy5c83OnTvN4cOHjTHG/P3vfzdBQUHmk08+Mbt37za33nqradiwoTl79qx9H3379jXt27c3W7duNV9++aWJjo42w4YNs6/PzMw0oaGh5vbbbzd79uwx7777rvH19TUvv/xyhZ8vXNOlrsPTp0+bKVOmmM2bN5uDBw+a1atXm+uuu85ER0ebc+fO2ffBdYgrcffdd5vAwECzfv16k5aWZn/9+uuv9j7l8Xv4xx9/NL6+vub+++83+/btMy+++KJxd3c3q1atqtDzRekQpMrR888/b6655hrj6elpOnfubLZs2eLsklCFDBkyxISHhxtPT09Tr149M2TIEJOammpff/bsWXPPPfeYWrVqGV9fX/PHP/7RpKWlOezj0KFDpl+/fsbHx8fUqVPHTJ482eTn51f0qaASWbdunZFU7DVixAhjzG9ToD/yyCMmNDTUeHl5mV69epmUlBSHfZw8edIMGzbM+Pv7m4CAADNq1Chz+vRphz7ffPON6datm/Hy8jL16tUzf//73yvqFFEJXOo6/PXXX02fPn1M3bp1TY0aNUxUVJS58847i/2PTK5DXImSrj9JZvHixfY+5fV7eN26daZdu3bG09PTXHvttQ7HgGuxGWNMRY+CAQAAAEBlxjNSAAAAAGARQQoAAAAALCJIAQAAAIBFBCkAAAAAsIggBQAAAAAWEaQAAAAAwCKCFAAAAABYRJACAAAAAIsIUgAAlzdy5EgNGDCg3Pebnp6u3r17y8/PT0FBQRV67KuhQYMGmjdv3iX72Gw2ffzxxxVSDwBUZQQpAIAk1wgMhw4dks1m065duyrkeM8995zS0tK0a9cu/fDDDyX2mT9/vpKSkiqknt9LSkq6aLi7mG3btmns2LFXpyAAgAMPZxcAAICzHDhwQB06dFB0dPRF+wQGBlZgRVembt26zi4BAKoNRqQAAKWyZ88e9evXT/7+/goNDdXtt9+u//73v/b1PXr00H333acHHnhAwcHBCgsL04wZMxz28f3336tbt27y9vZWixYttHr1aodbzRo2bChJat++vWw2m3r06OGw/TPPPKPw8HDVrl1biYmJys/Pv2TNCxcuVKNGjeTp6ammTZvqjTfesK9r0KCBPvzwQy1ZskQ2m00jR44scR8XjtSV5jxtNpsWLlyofv36ycfHR9dee60++OAD+/r169fLZrMpMzPT3rZr1y7ZbDYdOnRI69ev16hRo5SVlSWbzSabzVbsGCW58Na+/fv3q3v37vb3Ozk52aF/Xl6exo0bp/DwcHl7eysqKkqzZ8++7HEAAAQpAEApZGZm6qabblL79u21fft2rVq1SsePH9fgwYMd+r3++uvy8/PT1q1bNWfOHD322GP2P94LCgo0YMAA+fr6auvWrfrHP/6hhx56yGH7r776SpK0evVqpaWl6aOPPrKvW7dunQ4cOKB169bp9ddfV1JS0iVvuVu2bJnGjx+vyZMna8+ePfrb3/6mUaNGad26dZJ+uw2ub9++Gjx4sNLS0jR//vxSvx+XOs8ijzzyiAYNGqRvvvlGw4cP19ChQ7Vv375S7f8Pf/iD5s2bp4CAAKWlpSktLU1TpkwpdX2SVFhYqIEDB8rT01Nbt27VokWLNHXqVIc+CxYs0Keffqr3339fKSkpeuutt9SgQQNLxwGA6opb+wAAl/XCCy+offv2evLJJ+1tr732miIjI/XDDz+oSZMmkqQ2bdpo+vTpkqTo6Gi98MILWrNmjXr37q3k5GQdOHBA69evV1hYmCRp1qxZ6t27t32fRbem1a5d296nSK1atfTCCy/I3d1dzZo1U3x8vNasWaM777yzxJqfeeYZjRw5Uvfcc48kadKkSdqyZYueeeYZ9ezZU3Xr1pWXl5d8fHyKHetyLnWeRf785z9rzJgxkqTHH39cycnJev755/XSSy9ddv+enp4KDAyUzWazXFuR1atX6/vvv9fnn3+uiIgISdKTTz6pfv362fscOXJE0dHR6tatm2w2m6Kiosp0LACojhiRAgBc1jfffKN169bJ39/f/mrWrJmk354zKtKmTRuH7cLDw5WRkSFJSklJUWRkpEMw6Ny5c6lraNmypdzd3Uvcd0n27dunrl27OrR17dq11KNCl3Kp8ywSExNTbLk8jl1a+/btU2RkpD1ElVTTyJEjtWvXLjVt2lT33XefvvjiiwqrDwAqO0akAACXlZOTo4SEBD311FPF1oWHh9v/XaNGDYd1NptNhYWF5VLD1dx3Rdfi5vbb/8c0xtjbLve819Vw3XXX6eDBg1q5cqVWr16twYMHKzY21uF5LgBAyRiRAgBc1nXXXae9e/eqQYMGaty4scPLz8+vVPto2rSpfvrpJx0/ftzetm3bNoc+np6ekn57nupKNW/eXJs2bXJo27Rpk1q0aHHF+y6NLVu2FFtu3ry5pP/dwpiWlmZff+GU756enlf0PjRv3lw//fSTwzEurEmSAgICNGTIEL3yyit677339OGHH+rUqVNlPi4AVBeMSAEA7LKysor9QV80Q94rr7yiYcOG2WerS01N1bvvvqtXX33V4Za7i+ndu7caNWqkESNGaM6cOTp9+rQefvhhSb+N6EhSSEiIfHx8tGrVKtWvX1/e3t5lnn78/vvv1+DBg9W+fXvFxsZq+fLl+uijj7R69eoy7c+qpUuXqmPHjurWrZveeustffXVV/rnP/8pSWrcuLEiIyM1Y8YMzZo1Sz/88IOeffZZh+0bNGignJwcrVmzRm3btpWvr698fX1LffzY2Fg1adJEI0aM0NNPP63s7Oxik3vMnTtX4eHhat++vdzc3LR06VKFhYVZ/vwqAKiOGJECANitX79e7du3d3jNnDlTERER2rRpkwoKCtSnTx+1bt1aEyZMUFBQkP02tctxd3fXxx9/rJycHHXq1Eljxoyx/2Hv7e0tSfLw8NCCBQv08ssvKyIiQrfeemuZz2XAgAGaP3++nnnmGbVs2VIvv/yyFi9eXGxK9atl5syZevfdd9WmTRstWbJE77zzjn00rEaNGnrnnXf0/fffq02bNnrqqaf0xBNPOGz/hz/8QXfddZeGDBmiunXras6cOZaO7+bmpmXLluns2bPq3LmzxowZo1mzZjn0qVmzpubMmaOOHTuqU6dOOnTokD777LNSf00BoDqzmd/foA0AQAXatGmTunXrptTUVDVq1MjZ5ZQbm82mZcuWOXz+FACgauHWPgBAhVm2bJn8/f0VHR2t1NRUjR8/Xl27dq1SIQoAUD0QpAAAFeb06dOaOnWqjhw5ojp16ig2NrbYs0Eo2b///W+Hz4C6UE5OTgVWAwDg1j4AACqBs2fP6ueff77o+saNG1dgNQAAghQAAAAAWMS0PAAAAABgEUEKAAAAACwiSAEAAACARQQpAAAAALCIIAUAAAAAFhGkAAAAAMAighQAAAAAWPT/m3Giib2ZLtgAAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "def plot_data_lengths(tokenize_train_dataset, tokenized_val_dataset):\n", " lengths = [len(x['input_ids']) for x in tokenized_train_dataset]\n", " lengths += [len(x['input_ids']) for x in tokenized_val_dataset]\n", " print(len(lengths))\n", "\n", " # Plotting the histogram\n", " plt.figure(figsize=(10, 6))\n", " plt.hist(lengths, bins=20, alpha=0.7, color='blue')\n", " plt.xlabel('Length of input_ids')\n", " plt.ylabel('Frequency')\n", " plt.title('Distribution of Lengths of input_ids')\n", " plt.show()\n", "\n", "plot_data_lengths(tokenized_train_dataset, tokenized_val_dataset)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "id": "acINaViR3m19" }, "outputs": [], "source": [ "max_length = 2000 # This was an appropriate max length for my dataset\n", "\n", "def generate_and_tokenize_prompt2(prompt):\n", " result = tokenizer(\n", " formatting_func(prompt),\n", " truncation=True,\n", " max_length=max_length,\n", " padding=\"max_length\",\n", " )\n", " result[\"labels\"] = result[\"input_ids\"].copy()\n", " return result" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "referenced_widgets": [ "518d4f0b89bf4d57bf00d4c6d6e59eb5" ] }, "id": "lTk-aTog3m19", "outputId": "4fb637b4-77a2-47c6-de7b-4fb620663dd7" }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8f25652c0feb4455a869f961f4e95f75", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/800 [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_data_lengths(tokenized_train_dataset, tokenized_val_dataset)" ] }, { "cell_type": "markdown", "metadata": { "id": "jP3R4enP3m19" }, "source": [ "### How does the base model do?" ] }, { "cell_type": "markdown", "metadata": { "id": "Vxbl4ACsyRgi" }, "source": [ "Optionally, you can check how Mistral does on one of your data samples. For example, if you have a dataset of users' biometric data to their health scores, you could test the following `eval_prompt`:" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "id": "gOxnx-cAyRgi" }, "outputs": [], "source": [ "eval_prompt = \"\"\" Given the following biometric data, score the users' health, from 0-100.\n", "\n", "### Biometric Data:\n", "Temperature=98.2,\n", "Sex=F,\n", "Age=29,\n", "Height=69 inches,\n", "Weight=160 lbs,\n", "V02_Max=55,\n", "HRV=55\n", "\n", "### Health Score:\n", "\"\"\"" ] }, { "cell_type": "markdown", "metadata": { "id": "KRhfq_Fa3m19" }, "source": [ "The `eval_prompt` I used was:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "pa6ux9ni3m19" }, "outputs": [], "source": [ "eval_prompt = \" The following is a note by Eevee the Dog: # \"" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "id": "NidIuFXMyRgi", "outputId": "b1794b11-9a22-4b0a-e871-7df039ab59fc" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " Given the following biometric data, score the users' health, from 0-100.\n", "\n", "### Biometric Data:\n", "Temperature=98.2,\n", "Sex=F,\n", "Age=29,\n", "Height=69 inches,\n", "Weight=160 lbs,\n", "V02_Max=55,\n", "HRV=55\n", "\n", "### Health Score:\n", "Health Score = 73\n", "\n", "## How to run this code?\n", "\n", "You can use any IDE of your choice and run it locally on your machine.\n", "\n", "## What is the output?\n", "\n", "The output will be a number between 0 - 100 which represents the user's health.\n", "\n", "## Why did we choose these features?\n", "\n", "We chose these features because they are easily accessible and have been shown to be good predictors of health in previous studies.\n", "\n", "## What does each feature represent?\n", "\n", "* Temperature: The temperature of the body. A higher temperature may indicate an infection or fever.\n", "* Sex: Whether the person is male or female. Males tend to have lower HRV than females.\n", "* Age: The age of the person. Older people tend to have lower VO2 max and HR than younger people.\n", "* Height: The height of the person. Taller people tend to have lower BMI than shorter people.\n", "* Weight: The weight of the person. People with higher weights tend to have lower VO2 max and HR than those with lower weights.\n", "* VO2 Max: The maximum amount of oxygen that the body can consume during exercise. Higher values indicate better cardiovascular fitness.\n", "* HRV: Heart rate variability. Lower values indicate worse heart health.\n" ] } ], "source": [ "# Re-init the tokenizer so it doesn't add padding or eos token\n", "tokenizer = AutoTokenizer.from_pretrained(\n", " base_model_id,\n", " add_bos_token=True,\n", ")\n", "\n", "model_input = tokenizer(eval_prompt, return_tensors=\"pt\").to(\"cuda\")\n", "\n", "model.eval()\n", "with torch.no_grad():\n", " print(tokenizer.decode(model.generate(**model_input, max_new_tokens=500, repetition_penalty=1.15)[0], skip_special_tokens=True))" ] }, { "cell_type": "markdown", "metadata": { "id": "dCAWeCzZyRgi" }, "source": [ "Observe how the model does out of the box." ] }, { "cell_type": "markdown", "metadata": { "id": "AapDoyfAyRgi" }, "source": [ "### 4. Set Up LoRA" ] }, { "cell_type": "markdown", "metadata": { "id": "Mp2gMi1ZzGET" }, "source": [ "Now, to start our fine-tuning, we have to apply some preprocessing to the model to prepare it for training. For that use the `prepare_model_for_kbit_training` method from PEFT." ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "id": "a9EUEDAl0ss3" }, "outputs": [], "source": [ "from peft import prepare_model_for_kbit_training\n", "\n", "model.gradient_checkpointing_enable()\n", "model = prepare_model_for_kbit_training(model)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "id": "gkIcwsSU01EB" }, "outputs": [], "source": [ "def print_trainable_parameters(model):\n", " \"\"\"\n", " Prints the number of trainable parameters in the model.\n", " \"\"\"\n", " trainable_params = 0\n", " all_param = 0\n", " for _, param in model.named_parameters():\n", " all_param += param.numel()\n", " if param.requires_grad:\n", " trainable_params += param.numel()\n", " print(\n", " f\"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}\"\n", " )" ] }, { "cell_type": "markdown", "metadata": { "id": "cUYEpEK-yRgj" }, "source": [ "Let's print the model to examine its layers, as we will apply QLoRA to all the linear layers of the model. Those layers are `q_proj`, `k_proj`, `v_proj`, `o_proj`, `gate_proj`, `up_proj`, `down_proj`, and `lm_head`." ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "id": "XshGNsbxyRgj", "outputId": "c619b0e8-8516-4d4b-9abe-13eaa3f3b204", "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LlamaForCausalLM(\n", " (model): LlamaModel(\n", " (embed_tokens): Embedding(32000, 4096)\n", " (layers): ModuleList(\n", " (0-31): 32 x LlamaDecoderLayer(\n", " (self_attn): LlamaAttention(\n", " (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n", " (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)\n", " (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)\n", " (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)\n", " (rotary_emb): LlamaRotaryEmbedding()\n", " )\n", " (mlp): LlamaMLP(\n", " (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)\n", " (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)\n", " (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)\n", " (act_fn): SiLU()\n", " )\n", " (input_layernorm): LlamaRMSNorm()\n", " (post_attention_layernorm): LlamaRMSNorm()\n", " )\n", " )\n", " (norm): LlamaRMSNorm()\n", " )\n", " (lm_head): Linear(in_features=4096, out_features=32000, bias=False)\n", ")\n" ] } ], "source": [ "print(model)" ] }, { "cell_type": "markdown", "metadata": { "id": "I6mTLuQJyRgj" }, "source": [ "Here we define the LoRA config.\n", "\n", "`r` is the rank of the low-rank matrix used in the adapters, which thus controls the number of parameters trained. A higher rank will allow for more expressivity, but there is a compute tradeoff.\n", "\n", "`alpha` is the scaling factor for the learned weights. The weight matrix is scaled by `alpha/r`, and thus a higher value for `alpha` assigns more weight to the LoRA activations.\n", "\n", "The values used in the QLoRA paper were `r=64` and `lora_alpha=16`, and these are said to generalize well, but we will use `r=32` and `lora_alpha=64` so that we have more emphasis on the new fine-tuned data while also reducing computational complexity." ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "id": "Ybeyl20n3dYH", "outputId": "6a16c182-04d9-4812-ae81-502a8fe364d0" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "trainable params: 85041152 || all params: 3837112320 || trainable%: 2.2162799758751914\n" ] } ], "source": [ "from peft import LoraConfig, get_peft_model\n", "\n", "config = LoraConfig(\n", " r=32,\n", " lora_alpha=64,\n", " target_modules=[\n", " \"q_proj\",\n", " \"k_proj\",\n", " \"v_proj\",\n", " \"o_proj\",\n", " \"gate_proj\",\n", " \"up_proj\",\n", " \"down_proj\",\n", " \"lm_head\",\n", " ],\n", " bias=\"none\",\n", " lora_dropout=0.05, # Conventional\n", " task_type=\"CAUSAL_LM\",\n", ")\n", "\n", "model = get_peft_model(model, config)\n", "print_trainable_parameters(model)" ] }, { "cell_type": "markdown", "metadata": { "id": "X_FHi_VLyRgn" }, "source": [ "See how the model looks different now, with the LoRA adapters added:" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "id": "IaYMWak4yRgn" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "PeftModelForCausalLM(\n", " (base_model): LoraModel(\n", " (model): LlamaForCausalLM(\n", " (model): LlamaModel(\n", " (embed_tokens): Embedding(32000, 4096)\n", " (layers): ModuleList(\n", " (0-31): 32 x LlamaDecoderLayer(\n", " (self_attn): LlamaAttention(\n", " (q_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=32, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=32, out_features=4096, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (k_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=32, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=32, out_features=1024, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (v_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=32, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=32, out_features=1024, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (o_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=32, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=32, out_features=4096, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (rotary_emb): LlamaRotaryEmbedding()\n", " )\n", " (mlp): LlamaMLP(\n", " (gate_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=32, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=32, out_features=14336, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (up_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=4096, out_features=14336, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=32, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=32, out_features=14336, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (down_proj): lora.Linear4bit(\n", " (base_layer): Linear4bit(in_features=14336, out_features=4096, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=14336, out_features=32, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=32, out_features=4096, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " (act_fn): SiLU()\n", " )\n", " (input_layernorm): LlamaRMSNorm()\n", " (post_attention_layernorm): LlamaRMSNorm()\n", " )\n", " )\n", " (norm): LlamaRMSNorm()\n", " )\n", " (lm_head): lora.Linear(\n", " (base_layer): Linear(in_features=4096, out_features=32000, bias=False)\n", " (lora_dropout): ModuleDict(\n", " (default): Dropout(p=0.05, inplace=False)\n", " )\n", " (lora_A): ModuleDict(\n", " (default): Linear(in_features=4096, out_features=32, bias=False)\n", " )\n", " (lora_B): ModuleDict(\n", " (default): Linear(in_features=32, out_features=32000, bias=False)\n", " )\n", " (lora_embedding_A): ParameterDict()\n", " (lora_embedding_B): ParameterDict()\n", " )\n", " )\n", " )\n", ")\n" ] } ], "source": [ "print(model)" ] }, { "cell_type": "markdown", "metadata": { "id": "05H5MIfjyRgc" }, "source": [ "### Accelerator\n", "\n", "Set up the Accelerator. I'm not sure if we really need this for a QLoRA given its [description](https://huggingface.co/docs/accelerate/v0.19.0/en/usage_guides/fsdp) (I have to read more about it) but it seems it can't hurt, and it's helpful to have the code for future reference. You can always comment out the accelerator if you want to try without." ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "id": "TEzYBadkyRgd" }, "outputs": [], "source": [ "from accelerate import FullyShardedDataParallelPlugin, Accelerator\n", "from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig\n", "\n", "fsdp_plugin = FullyShardedDataParallelPlugin(\n", " state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),\n", " optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),\n", ")\n", "\n", "accelerator = Accelerator(fsdp_plugin=fsdp_plugin)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "id": "yxSbpKQSLY6B" }, "outputs": [], "source": [ "model = accelerator.prepare_model(model)" ] }, { "cell_type": "markdown", "metadata": { "id": "-9KNTJZkyRgn" }, "source": [ "\n", "Let's use Weights & Biases to track our training metrics. You'll need to apply an API key when prompted. Feel free to skip this if you'd like, and just comment out the `wandb` parameters in the `Trainer` definition below." ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "id": "DDqUNyIoyRgo" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n", "\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:" ] }, { "name": "stdin", "output_type": "stream", "text": [ " ········\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /home/ubuntu/.netrc\n" ] } ], "source": [ "!pip install -q wandb -U\n", "\n", "import wandb, os\n", "wandb.login()\n", "\n", "wandb_project = \"finetune\"\n", "if len(wandb_project) > 0:\n", " os.environ[\"WANDB_PROJECT\"] = wandb_project" ] }, { "cell_type": "markdown", "metadata": { "id": "_0MOtwf3zdZp" }, "source": [ "### 5. Run Training!" ] }, { "cell_type": "markdown", "metadata": { "id": "fEe0uWYSyRgo" }, "source": [ "I didn't have a lot of training samples: only about 200 total train/validation. I used 500 training steps, and I was fine with overfitting in this case. I found that the end product worked well. It took about 20 minutes on the 1x A10G 24GB.\n", "\n", "Overfitting is when the validation loss goes up (bad) while the training loss goes down significantly, meaning the model is learning the training set really well, but is unable to generalize to new datapoints. In most cases, this is not desired, but since I am just playing around with a model to generate outputs like my journal entries, I was fine with a moderate amount of overfitting.\n", "\n", "With that said, a note on training: you can set the `max_steps` to be high initially, and examine at what step your model's performance starts to degrade. There is where you'll find a sweet spot for how many steps to perform. For example, say you start with 1000 steps, and find that at around 500 steps the model starts overfitting, as described above. Therefore, 500 steps would be your sweet spot, so you would use the `checkpoint-500` model repo in your output dir (`mistral-journal-finetune`) as your final model in step 6 below.\n", "\n", "If you're just doing something for fun like I did and are OK with overfitting, you can try different checkpoint versions with different degrees of overfitting.\n", "\n", "You can interrupt the process via Kernel -> Interrupt Kernel in the top nav bar once you realize you didn't need to train anymore." ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "id": "c_L1131GyRgo" }, "outputs": [], "source": [ "if torch.cuda.device_count() > 1: # If more than 1 GPU\n", " model.is_parallelizable = True\n", " model.model_parallel = True" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "id": "jq0nX33BmfaC" }, "outputs": [ { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [131/500 1:01:31 < 2:55:58, 0.03 it/s, Epoch 0.33/2]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining LossValidation Loss
101.5765001.464192
201.5681001.441079
301.2801001.429308
401.5474001.430723
501.3972001.429593
601.5780001.425429
701.3322001.424603
801.4031001.423259
901.4540001.418888
1001.4341001.416797
1101.4684001.417637
1201.2462001.416588

\n", "

\n", " \n", " \n", " [ 6/25 00:44 < 02:48, 0.11 it/s]\n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "/home/ubuntu/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n", " warnings.warn(\n", "/home/ubuntu/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n", " warnings.warn(\n", "/home/ubuntu/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n", " warnings.warn(\n", "/home/ubuntu/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n", " warnings.warn(\n", "/home/ubuntu/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n", " warnings.warn(\n", "/home/ubuntu/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n", " warnings.warn(\n", "/home/ubuntu/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n", " warnings.warn(\n", "/home/ubuntu/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n", " warnings.warn(\n", "/home/ubuntu/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n", " warnings.warn(\n", "/home/ubuntu/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n", " warnings.warn(\n", "/home/ubuntu/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n", " warnings.warn(\n", "/home/ubuntu/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/utils/checkpoint.py:429: UserWarning: torch.utils.checkpoint: please pass in use_reentrant=True or use_reentrant=False explicitly. The default value of use_reentrant will be updated to be False in the future. To maintain current behavior, pass use_reentrant=True. It is recommended that you use use_reentrant=False. Refer to docs for more details on the differences between the two variants.\n", " warnings.warn(\n" ] }, { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[33], line 39\u001b[0m\n\u001b[1;32m 11\u001b[0m trainer \u001b[38;5;241m=\u001b[39m transformers\u001b[38;5;241m.\u001b[39mTrainer(\n\u001b[1;32m 12\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m 13\u001b[0m train_dataset\u001b[38;5;241m=\u001b[39mtokenized_train_dataset,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 35\u001b[0m data_collator\u001b[38;5;241m=\u001b[39mtransformers\u001b[38;5;241m.\u001b[39mDataCollatorForLanguageModeling(tokenizer, mlm\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m),\n\u001b[1;32m 36\u001b[0m )\n\u001b[1;32m 38\u001b[0m model\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39muse_cache \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m \u001b[38;5;66;03m# silence the warnings. Please re-enable for inference!\u001b[39;00m\n\u001b[0;32m---> 39\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/transformers/trainer.py:1556\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1554\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m 1555\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1556\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1557\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1558\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1559\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1560\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1561\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/transformers/trainer.py:1934\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1931\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mepoch \u001b[38;5;241m=\u001b[39m epoch \u001b[38;5;241m+\u001b[39m (step \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m steps_skipped) \u001b[38;5;241m/\u001b[39m steps_in_epoch\n\u001b[1;32m 1932\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_step_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[0;32m-> 1934\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_maybe_log_save_evaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtr_loss\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mepoch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1935\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1936\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_substep_end(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/transformers/trainer.py:2283\u001b[0m, in \u001b[0;36mTrainer._maybe_log_save_evaluate\u001b[0;34m(self, tr_loss, model, trial, epoch, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 2281\u001b[0m metrics\u001b[38;5;241m.\u001b[39mupdate(dataset_metrics)\n\u001b[1;32m 2282\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 2283\u001b[0m metrics \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevaluate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2284\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_report_to_hp_search(trial, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step, metrics)\n\u001b[1;32m 2286\u001b[0m \u001b[38;5;66;03m# Run delayed LR scheduler now that metrics are populated\u001b[39;00m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/transformers/trainer.py:3026\u001b[0m, in \u001b[0;36mTrainer.evaluate\u001b[0;34m(self, eval_dataset, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m 3023\u001b[0m start_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[1;32m 3025\u001b[0m eval_loop \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprediction_loop \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39muse_legacy_prediction_loop \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mevaluation_loop\n\u001b[0;32m-> 3026\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43meval_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3027\u001b[0m \u001b[43m \u001b[49m\u001b[43meval_dataloader\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3028\u001b[0m \u001b[43m \u001b[49m\u001b[43mdescription\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mEvaluation\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3029\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# No point gathering the predictions if there are no metrics, otherwise we defer to\u001b[39;49;00m\n\u001b[1;32m 3030\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# self.args.prediction_loss_only\u001b[39;49;00m\n\u001b[1;32m 3031\u001b[0m \u001b[43m \u001b[49m\u001b[43mprediction_loss_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_metrics\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 3032\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3033\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetric_key_prefix\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetric_key_prefix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3034\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3036\u001b[0m total_batch_size \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39meval_batch_size \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mworld_size\n\u001b[1;32m 3037\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmetric_key_prefix\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_jit_compilation_time\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m output\u001b[38;5;241m.\u001b[39mmetrics:\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/transformers/trainer.py:3215\u001b[0m, in \u001b[0;36mTrainer.evaluation_loop\u001b[0;34m(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)\u001b[0m\n\u001b[1;32m 3212\u001b[0m batch_size \u001b[38;5;241m=\u001b[39m observed_batch_size\n\u001b[1;32m 3214\u001b[0m \u001b[38;5;66;03m# Prediction step\u001b[39;00m\n\u001b[0;32m-> 3215\u001b[0m loss, logits, labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprediction_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprediction_loss_only\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3216\u001b[0m main_input_name \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmain_input_name\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3217\u001b[0m inputs_decode \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepare_input(inputs[main_input_name]) \u001b[38;5;28;01mif\u001b[39;00m args\u001b[38;5;241m.\u001b[39minclude_inputs_for_metrics \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/transformers/trainer.py:3436\u001b[0m, in \u001b[0;36mTrainer.prediction_step\u001b[0;34m(self, model, inputs, prediction_loss_only, ignore_keys)\u001b[0m\n\u001b[1;32m 3434\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m has_labels \u001b[38;5;129;01mor\u001b[39;00m loss_without_labels:\n\u001b[1;32m 3435\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[0;32m-> 3436\u001b[0m loss, outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreturn_outputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 3437\u001b[0m loss \u001b[38;5;241m=\u001b[39m loss\u001b[38;5;241m.\u001b[39mmean()\u001b[38;5;241m.\u001b[39mdetach()\n\u001b[1;32m 3439\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(outputs, \u001b[38;5;28mdict\u001b[39m):\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/transformers/trainer.py:2762\u001b[0m, in \u001b[0;36mTrainer.compute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m 2760\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2761\u001b[0m labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 2762\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2763\u001b[0m \u001b[38;5;66;03m# Save past state if it exists\u001b[39;00m\n\u001b[1;32m 2764\u001b[0m \u001b[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001b[39;00m\n\u001b[1;32m 2765\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpast_index \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1529\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/accelerate/utils/operations.py:673\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32..forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 672\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 673\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/accelerate/utils/operations.py:661\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 660\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 661\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/amp/autocast_mode.py:16\u001b[0m, in \u001b[0;36mautocast_decorator..decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 16\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/accelerate/utils/operations.py:673\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32..forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 672\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 673\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/accelerate/utils/operations.py:661\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 660\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 661\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/amp/autocast_mode.py:16\u001b[0m, in \u001b[0;36mautocast_decorator..decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 16\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/accelerate/utils/operations.py:673\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32..forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 672\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 673\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/accelerate/utils/operations.py:661\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 660\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 661\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/amp/autocast_mode.py:16\u001b[0m, in \u001b[0;36mautocast_decorator..decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 16\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/peft/peft_model.py:1003\u001b[0m, in \u001b[0;36mPeftModelForCausalLM.forward\u001b[0;34m(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)\u001b[0m\n\u001b[1;32m 992\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAssertionError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mforward in MPTForCausalLM does not support inputs_embeds\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 993\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbase_model(\n\u001b[1;32m 994\u001b[0m input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[1;32m 995\u001b[0m attention_mask\u001b[38;5;241m=\u001b[39mattention_mask,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1000\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 1001\u001b[0m )\n\u001b[0;32m-> 1003\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1004\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1005\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1006\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1007\u001b[0m \u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1008\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1009\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1010\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1011\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1012\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1014\u001b[0m batch_size \u001b[38;5;241m=\u001b[39m _get_batch_size(input_ids, inputs_embeds)\n\u001b[1;32m 1015\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m attention_mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1016\u001b[0m \u001b[38;5;66;03m# concat prompt attention mask\u001b[39;00m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1529\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:108\u001b[0m, in \u001b[0;36mBaseTuner.forward\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 107\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any):\n\u001b[0;32m--> 108\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/accelerate/hooks.py:164\u001b[0m, in \u001b[0;36madd_hook_to_module..new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 162\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 163\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 164\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:1034\u001b[0m, in \u001b[0;36mLlamaForCausalLM.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 1031\u001b[0m return_dict \u001b[38;5;241m=\u001b[39m return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39muse_return_dict\n\u001b[1;32m 1033\u001b[0m \u001b[38;5;66;03m# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)\u001b[39;00m\n\u001b[0;32m-> 1034\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1035\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1036\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1037\u001b[0m \u001b[43m \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1038\u001b[0m \u001b[43m \u001b[49m\u001b[43mpast_key_values\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1039\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1040\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1041\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1042\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1043\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1044\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1046\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 1047\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mpretraining_tp \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1529\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/accelerate/hooks.py:164\u001b[0m, in \u001b[0;36madd_hook_to_module..new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 162\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 163\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 164\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:922\u001b[0m, in \u001b[0;36mLlamaModel.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 912\u001b[0m layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_gradient_checkpointing_func(\n\u001b[1;32m 913\u001b[0m decoder_layer\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__call__\u001b[39m,\n\u001b[1;32m 914\u001b[0m hidden_states,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 919\u001b[0m use_cache,\n\u001b[1;32m 920\u001b[0m )\n\u001b[1;32m 921\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 922\u001b[0m layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[43mdecoder_layer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 923\u001b[0m \u001b[43m \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 924\u001b[0m \u001b[43m \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 925\u001b[0m \u001b[43m \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 926\u001b[0m \u001b[43m \u001b[49m\u001b[43mpast_key_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 927\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 928\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 929\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 931\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m layer_outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m 933\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_cache:\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1529\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/accelerate/hooks.py:164\u001b[0m, in \u001b[0;36madd_hook_to_module..new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 162\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 163\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 164\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:686\u001b[0m, in \u001b[0;36mLlamaDecoderLayer.forward\u001b[0;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, **kwargs)\u001b[0m\n\u001b[1;32m 684\u001b[0m residual \u001b[38;5;241m=\u001b[39m hidden_states\n\u001b[1;32m 685\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpost_attention_layernorm(hidden_states)\n\u001b[0;32m--> 686\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmlp\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 687\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m residual \u001b[38;5;241m+\u001b[39m hidden_states\n\u001b[1;32m 689\u001b[0m outputs \u001b[38;5;241m=\u001b[39m (hidden_states,)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1529\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/accelerate/hooks.py:164\u001b[0m, in \u001b[0;36madd_hook_to_module..new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m 162\u001b[0m output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 163\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 164\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/transformers/models/llama/modeling_llama.py:258\u001b[0m, in \u001b[0;36mLlamaMLP.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 256\u001b[0m down_proj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28msum\u001b[39m(down_proj)\n\u001b[1;32m 257\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 258\u001b[0m down_proj \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdown_proj(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mact_fn(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgate_proj(x)) \u001b[38;5;241m*\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mup_proj\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 260\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m down_proj\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/nn/modules/module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1517\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1518\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/torch/nn/modules/module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1522\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1523\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1524\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1525\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1526\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1527\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1529\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1530\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/.pyenv/versions/3.10.13/lib/python3.10/site-packages/peft/tuners/lora/bnb.py:307\u001b[0m, in \u001b[0;36mLinear4bit.forward\u001b[0;34m(self, x, *args, **kwargs)\u001b[0m\n\u001b[1;32m 305\u001b[0m output \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39mto(expected_dtype)\n\u001b[1;32m 306\u001b[0m output \u001b[38;5;241m=\u001b[39m output \u001b[38;5;241m*\u001b[39m scaling\n\u001b[0;32m--> 307\u001b[0m result \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m output\n\u001b[1;32m 309\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "import transformers\n", "from datetime import datetime\n", "\n", "project = \"finetune\"\n", "base_model_name = \"mistral\"\n", "run_name = base_model_name + \"-\" + project\n", "output_dir = \"./\" + run_name\n", "\n", "tokenizer.pad_token = tokenizer.eos_token\n", "\n", "trainer = transformers.Trainer(\n", " model=model,\n", " train_dataset=tokenized_train_dataset,\n", " eval_dataset=tokenized_val_dataset,\n", " args=transformers.TrainingArguments(\n", " output_dir=output_dir,\n", " warmup_steps=1,\n", " per_device_train_batch_size=2,\n", " gradient_accumulation_steps=1,\n", " gradient_checkpointing=True,\n", " max_steps=500,\n", " learning_rate=2.5e-5, # Want a small lr for finetuning\n", " bf16=True,\n", " optim=\"paged_adamw_8bit\",\n", " logging_steps=10, # When to start reporting loss\n", " logging_dir=\"./logs\", # Directory for storing logs\n", " save_strategy=\"steps\", # Save the model checkpoint every logging step\n", " save_steps=10, # Save checkpoints every 50 steps\n", " evaluation_strategy=\"steps\", # Evaluate the model every logging step\n", " eval_steps=10, # Evaluate and save checkpoints every 50 steps\n", " do_eval=True, # Perform evaluation at the end of training\n", " report_to=\"wandb\", # Comment this out if you don't want to use weights & baises\n", " run_name=f\"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}\" # Name of the W&B run (optional)\n", " ),\n", " data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\n", ")\n", "\n", "model.config.use_cache = False # silence the warnings. Please re-enable for inference!\n", "trainer.train()" ] }, { "cell_type": "markdown", "metadata": { "id": "R9rRmDCeQiTJ" }, "source": [ "I cleared the output of the cell above because I stopped the training early, and it produced a long, ugly error message." ] }, { "cell_type": "markdown", "metadata": { "id": "0D57XqcsyRgo" }, "source": [ "### 6. Drum Roll... Try the Trained Model!\n", "\n", "It's a good idea to kill the current process so that you don't run out of memory loading the base model again on top of the model we just trained. Go to `Kernel > Restart Kernel` or kill the process via the Terminal (`nvidia smi` > `kill [PID]`). \n", "\n", "By default, the PEFT library will only save the QLoRA adapters, so we need to first load the base model from the Huggingface Hub:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "referenced_widgets": [ "fb8230fb86884aa6be318e2d03a88af2" ] }, "id": "SKSnF016yRgp", "outputId": "bce5209d-90da-4117-c6ac-cda9f3cb3422" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: huggingface_hub in ./.pyenv/versions/3.10.13/lib/python3.10/site-packages (0.19.4)\n", "Requirement already satisfied: fsspec>=2023.5.0 in ./.pyenv/versions/3.10.13/lib/python3.10/site-packages (from huggingface_hub) (2023.10.0)\n", "Requirement already satisfied: packaging>=20.9 in ./.pyenv/versions/3.10.13/lib/python3.10/site-packages (from huggingface_hub) (23.2)\n", "Requirement already satisfied: pyyaml>=5.1 in ./.pyenv/versions/3.10.13/lib/python3.10/site-packages (from huggingface_hub) (6.0.1)\n", "Requirement already satisfied: filelock in ./.pyenv/versions/3.10.13/lib/python3.10/site-packages (from huggingface_hub) (3.13.1)\n", "Requirement already satisfied: tqdm>=4.42.1 in ./.pyenv/versions/3.10.13/lib/python3.10/site-packages (from huggingface_hub) (4.66.1)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in ./.pyenv/versions/3.10.13/lib/python3.10/site-packages (from huggingface_hub) (4.8.0)\n", "Requirement already satisfied: requests in ./.pyenv/versions/3.10.13/lib/python3.10/site-packages (from huggingface_hub) (2.31.0)\n", "Requirement already satisfied: idna<4,>=2.5 in ./.pyenv/versions/3.10.13/lib/python3.10/site-packages (from requests->huggingface_hub) (3.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in ./.pyenv/versions/3.10.13/lib/python3.10/site-packages (from requests->huggingface_hub) (2023.11.17)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in ./.pyenv/versions/3.10.13/lib/python3.10/site-packages (from requests->huggingface_hub) (2.1.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in ./.pyenv/versions/3.10.13/lib/python3.10/site-packages (from requests->huggingface_hub) (3.3.2)\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" ] } ], "source": [ "!pip install huggingface_hub" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "/usr/bin\n" ] } ], "source": [ "import os\n", "print(os.getcwd())" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "bin include\tlib32 libexec\tlocal share\n", "games lib\tlib64 libx32\tsbin src\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] } ], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Create any files you like! Then...\n", "git add .\n", "git commit -m \"First model version\" # You can choose any descriptive message\n", "git push" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "referenced_widgets": [ "fb8230fb86884aa6be318e2d03a88af2" ] }, "id": "SKSnF016yRgp", "outputId": "bce5209d-90da-4117-c6ac-cda9f3cb3422" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ubuntu/.pyenv/versions/3.10.13/lib/python3.10/site-packages/transformers/models/auto/auto_factory.py:472: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n", " warnings.warn(\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1fac65376bd4407eb89ea80ea3e4f0a5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/8 [00:00