{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "A100", "machine_shape": "hm" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "531def06b1f7430983a2e4ba33f41f7f": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_847b6b899bfc4e9b89b6ecb136a21385", "IPY_MODEL_412da2e9912f4eb0ab89d44f0bb09cec", "IPY_MODEL_1d56fddc294241f6a7cb4a300cb09afd" ], "layout": "IPY_MODEL_6f83c639357f4729873f6897119532f0" } }, "847b6b899bfc4e9b89b6ecb136a21385": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2551b382eca04537a3a11cd70aaf574c", "placeholder": "​", "style": "IPY_MODEL_93e6cbabc77f4fd69ddc3dee9012cb8e", "value": "Loading checkpoint shards: 100%" } }, "412da2e9912f4eb0ab89d44f0bb09cec": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_da2997c847b84a32b43c377137f64b5e", "max": 4, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_24f16c1efe8547f1ab36efcccda46b59", "value": 4 } }, "1d56fddc294241f6a7cb4a300cb09afd": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_cc8cb81531344463aa881093fff8c2f0", "placeholder": "​", "style": "IPY_MODEL_f4c45b260e7a4feaaeef4c50c560641a", "value": " 4/4 [00:12<00:00,  2.77s/it]" } }, "6f83c639357f4729873f6897119532f0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2551b382eca04537a3a11cd70aaf574c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "93e6cbabc77f4fd69ddc3dee9012cb8e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "da2997c847b84a32b43c377137f64b5e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "24f16c1efe8547f1ab36efcccda46b59": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "cc8cb81531344463aa881093fff8c2f0": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f4c45b260e7a4feaaeef4c50c560641a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "0272ba7f31a2441ab1cb5b8f77dbaacb": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_d1bb171ddebd4f4bbeb4ed5d4b8b7076", "IPY_MODEL_33b4fc55703746778511265e28160837", "IPY_MODEL_7548c151f8764276ad7951e2ac80d981" ], "layout": "IPY_MODEL_d972c72fef7c45998469550318661e71" } }, "d1bb171ddebd4f4bbeb4ed5d4b8b7076": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_2811b7c68a7b4c95b91bd5690cf06577", "placeholder": "​", "style": "IPY_MODEL_a33ccfdb735948e98a19d901d8091319", "value": "Loading checkpoint shards: 100%" } }, "33b4fc55703746778511265e28160837": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c1103244cec74a299265729e630faffd", "max": 4, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_340941cfc49e4ab983b73fb48c30dfe8", "value": 4 } }, "7548c151f8764276ad7951e2ac80d981": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_8bb42aa84f4b4a9ab6417aed92132063", "placeholder": "​", "style": "IPY_MODEL_b0cf428afc21468caeb664428627aaf6", "value": " 4/4 [00:11<00:00,  2.57s/it]" } }, "d972c72fef7c45998469550318661e71": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2811b7c68a7b4c95b91bd5690cf06577": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a33ccfdb735948e98a19d901d8091319": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "c1103244cec74a299265729e630faffd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "340941cfc49e4ab983b73fb48c30dfe8": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "8bb42aa84f4b4a9ab6417aed92132063": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "b0cf428afc21468caeb664428627aaf6": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "source": [ "!pip install -U bitsandbytes transformers peft accelerate trl datasets sentencepiece wandb\n", "!pip install flash-attn --no-build-isolation" ], "metadata": { "id": "tg1moVggj5sk", "collapsed": true }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "MODEL_NAME = \"CohereForAI/aya-23-8b\"\n", "\n", "# you may want to change the following parameters depending on your GPU configuration\n", "\n", "# free T4 instance\n", "# QUANTIZE_4BIT = True\n", "# USE_GRAD_CHECKPOINTING = True\n", "# TRAIN_BATCH_SIZE = 2\n", "# TRAIN_MAX_SEQ_LENGTH = 512\n", "# USE_FLASH_ATTENTION = False\n", "# GRAD_ACC_STEPS = 16\n", "\n", "# equivalent A100 setting\n", "QUANTIZE_4BIT = True\n", "USE_GRAD_CHECKPOINTING = True\n", "TRAIN_BATCH_SIZE = 16\n", "TRAIN_MAX_SEQ_LENGTH = 512\n", "USE_FLASH_ATTENTION = True\n", "GRAD_ACC_STEPS = 2" ], "metadata": { "id": "Izn6BYEYw4um" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging\n", "from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model\n", "import os,torch\n", "import bitsandbytes as bnb\n", "from datasets import load_dataset\n", "from trl import SFTTrainer\n", "from datasets import Dataset\n", "import pyarrow as pa\n", "import pyarrow.dataset as ds\n", "import pandas as pd\n", "import re\n", "import wandb" ], "metadata": { "id": "wMs9uNDMHL6R" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Load Model\n", "quantization_config = None\n", "if QUANTIZE_4BIT:\n", " quantization_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_use_double_quant=True,\n", " bnb_4bit_compute_dtype=torch.bfloat16,\n", " )\n", "\n", "attn_implementation = None\n", "if USE_FLASH_ATTENTION:\n", " attn_implementation=\"flash_attention_2\"\n", "\n", "model = AutoModelForCausalLM.from_pretrained(\n", " MODEL_NAME,\n", " quantization_config=quantization_config,\n", " attn_implementation=attn_implementation,\n", " torch_dtype=torch.bfloat16,\n", " device_map=\"auto\",\n", " )" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 176, "referenced_widgets": [ "531def06b1f7430983a2e4ba33f41f7f", "847b6b899bfc4e9b89b6ecb136a21385", "412da2e9912f4eb0ab89d44f0bb09cec", "1d56fddc294241f6a7cb4a300cb09afd", "6f83c639357f4729873f6897119532f0", "2551b382eca04537a3a11cd70aaf574c", "93e6cbabc77f4fd69ddc3dee9012cb8e", "da2997c847b84a32b43c377137f64b5e", "24f16c1efe8547f1ab36efcccda46b59", "cc8cb81531344463aa881093fff8c2f0", "f4c45b260e7a4feaaeef4c50c560641a" ] }, "id": "d9a23_jiC-qG", "outputId": "3cf0666d-f23d-4382-b17b-c29cbe91d2f6" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:89: UserWarning: \n", "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", "You will be able to reuse this secret in all of your notebooks.\n", "Please note that authentication is recommended but still optional to access public models or datasets.\n", " warnings.warn(\n" ] }, { "output_type": "display_data", "data": { "text/plain": [ "Loading checkpoint shards: 0%| | 0/4 [00:00<|USER_TOKEN|>{example['inputs'][i]}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>{example['targets'][i]}\"\n", " output_texts.append(text)\n", " return output_texts" ], "metadata": { "id": "CHXm3Io5zCrk" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Training Arguments\n", "training_arguments = TrainingArguments(\n", " output_dir=\"results\",\n", " num_train_epochs=20,\n", " per_device_train_batch_size=TRAIN_BATCH_SIZE,\n", " gradient_accumulation_steps=GRAD_ACC_STEPS,\n", " gradient_checkpointing=USE_GRAD_CHECKPOINTING,\n", " optim=\"paged_adamw_32bit\",\n", " save_steps=50,\n", " logging_steps=10,\n", " learning_rate=1e-3,\n", " weight_decay=0.001,\n", " fp16=False,\n", " bf16=True,\n", " warmup_ratio=0.05,\n", " group_by_length=True,\n", " lr_scheduler_type=\"constant\",\n", " report_to=\"none\"\n", ")\n", "\n", "peft_config = LoraConfig(\n", " lora_alpha=32,\n", " r=32,\n", " bias=\"none\",\n", " task_type=\"CAUSAL_LM\",\n", " target_modules=[\"q_proj\", \"v_proj\", \"k_proj\", \"o_proj\"]\n", ")\n", "\n", "trainer = SFTTrainer(\n", " model=model,\n", " train_dataset=dataset,\n", " peft_config=peft_config,\n", " max_seq_length=TRAIN_MAX_SEQ_LENGTH,\n", " tokenizer=tokenizer,\n", " args=training_arguments,\n", " formatting_func=formatting_prompts_func\n", ")" ], "metadata": { "id": "A9OdyDDEy7rM", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "49592f25-4aaf-4e21-f612-a6fe5c5865e1" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/trl/trainer/sft_trainer.py:318: UserWarning: You passed a tokenizer with `padding_side` not equal to `right` to the SFTTrainer. This might lead to some unexpected behaviour due to overflow issues when training a model in half-precision. You might consider adding `tokenizer.padding_side = 'right'` to your code.\n", " warnings.warn(\n" ] } ] }, { "cell_type": "code", "source": [ "trainer.train()" ], "metadata": { "id": "9BvK-3eYiwhx" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Save the model to disk\n", "trainer.model.save_pretrained(save_directory='aya-qlora')\n", "model.config.use_cache = True\n", "model.eval()" ], "metadata": { "id": "X3Lqfwo-8CCG" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Test Bengali inference on loaded fine-tuned model\n", "\n", "# Load Model and LoRA Adapter\n", "quantization_config = None\n", "if QUANTIZE_4BIT:\n", " quantization_config = BitsAndBytesConfig(\n", " load_in_4bit=True,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_use_double_quant=True,\n", " bnb_4bit_compute_dtype=torch.bfloat16,\n", " )\n", "\n", "attn_implementation = None\n", "if USE_FLASH_ATTENTION:\n", " attn_implementation=\"flash_attention_2\"\n", "\n", "loaded_model = AutoModelForCausalLM.from_pretrained(\n", " MODEL_NAME,\n", " quantization_config=quantization_config,\n", " attn_implementation=attn_implementation,\n", " torch_dtype=torch.bfloat16,\n", " device_map=\"auto\",\n", " )\n", "tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)\n", "loaded_model.load_adapter(\"aya-qlora\")\n", "\n", "\n", "prompts = [\n", " 'Translate from English to Bengali: \"Rates are competitive, almost always the best in the market\"'\n", "]\n", "\n", "generations = generate_aya_23(prompts, loaded_model)\n", "\n", "for p, g in zip(prompts, generations):\n", " print(\n", " \"PROMPT\", p ,\"RESPONSE\", g, \"\\n\", sep=\"\\n\"\n", " )" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 174, "referenced_widgets": [ "0272ba7f31a2441ab1cb5b8f77dbaacb", "d1bb171ddebd4f4bbeb4ed5d4b8b7076", "33b4fc55703746778511265e28160837", "7548c151f8764276ad7951e2ac80d981", "d972c72fef7c45998469550318661e71", "2811b7c68a7b4c95b91bd5690cf06577", "a33ccfdb735948e98a19d901d8091319", "c1103244cec74a299265729e630faffd", "340941cfc49e4ab983b73fb48c30dfe8", "8bb42aa84f4b4a9ab6417aed92132063", "b0cf428afc21468caeb664428627aaf6" ] }, "id": "w5HGIJtRJN-y", "outputId": "441193fe-89fa-40ad-8585-d1f2dcf124e5" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Loading checkpoint shards: 0%| | 0/4 [00:00