{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "5f93b7d1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "===================================BUG REPORT===================================\n", "Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n", "For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link\n", "================================================================================\n", "CUDA SETUP: CUDA runtime path found: /home/sourab/miniconda3/envs/ml/lib/libcudart.so\n", "CUDA SETUP: Highest compute capability among GPUs detected: 7.5\n", "CUDA SETUP: Detected CUDA version 117\n", "CUDA SETUP: Loading binary /home/sourab/miniconda3/envs/ml/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...\n" ] } ], "source": [ "from transformers import AutoModelForSeq2SeqLM\n", "from peft import get_peft_config, get_peft_model, get_peft_model_state_dict, LoraConfig, TaskType\n", "import torch\n", "from datasets import load_dataset\n", "import os\n", "\n", "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", "from transformers import AutoTokenizer\n", "from torch.utils.data import DataLoader\n", "from transformers import default_data_collator, get_linear_schedule_with_warmup\n", "from tqdm import tqdm\n", "from datasets import load_dataset\n", "\n", "device = \"cuda\"\n", "model_name_or_path = \"bigscience/mt0-large\"\n", "tokenizer_name_or_path = \"bigscience/mt0-large\"\n", "\n", "checkpoint_name = \"financial_sentiment_analysis_lora_v1.pt\"\n", "text_column = \"sentence\"\n", "label_column = \"text_label\"\n", "max_length = 128\n", "lr = 1e-3\n", "num_epochs = 3\n", "batch_size = 8" ] }, { "cell_type": "code", "execution_count": null, "id": "8d0850ac", "metadata": {}, "outputs": [], "source": [ "# creating model\n", "peft_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1)\n", "\n", "model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path)\n", "model = get_peft_model(model, peft_config)\n", "model.print_trainable_parameters()\n", "model" ] }, { "cell_type": "code", "execution_count": 3, "id": "4ee2babf", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Found cached dataset financial_phrasebank (/home/sourab/.cache/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3403bf3d718042018b0531848cc30209", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00