{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "5f93b7d1", "metadata": { "ExecuteTime": { "end_time": "2023-05-30T08:37:58.711225Z", "start_time": "2023-05-30T08:37:56.881307Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "===================================BUG REPORT===================================\n", "Welcome to bitsandbytes. For bug reports, please run\n", "\n", "python -m bitsandbytes\n", "\n", " and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n", "================================================================================\n", "bin /udir/tschilla/anaconda3/envs/peft/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda117.so\n", "CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching in backup paths...\n", "CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0\n", "CUDA SETUP: Highest compute capability among GPUs detected: 8.0\n", "CUDA SETUP: Detected CUDA version 117\n", "CUDA SETUP: Loading binary /udir/tschilla/anaconda3/envs/peft/lib/python3.9/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/udir/tschilla/anaconda3/envs/peft/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: /udir/tschilla/anaconda3 did not contain ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] as expected! Searching further paths...\n", " warn(msg)\n", "/udir/tschilla/anaconda3/envs/peft/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('Europe/Paris')}\n", " warn(msg)\n", "/udir/tschilla/anaconda3/envs/peft/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/udir/tschilla/.cache/dotnet_bundle_extract')}\n", " warn(msg)\n", "/udir/tschilla/anaconda3/envs/peft/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('5002'), PosixPath('http'), PosixPath('//127.0.0.1')}\n", " warn(msg)\n", "/udir/tschilla/anaconda3/envs/peft/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('() { ( alias;\\n eval ${which_declare} ) | /usr/bin/which --tty-only --read-alias --read-functions --show-tilde --show-dot $@\\n}')}\n", " warn(msg)\n", "/udir/tschilla/anaconda3/envs/peft/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('module'), PosixPath('//matplotlib_inline.backend_inline')}\n", " warn(msg)\n", "/udir/tschilla/anaconda3/envs/peft/lib/python3.9/site-packages/bitsandbytes/cuda_setup/main.py:149: UserWarning: Found duplicate ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] files: {PosixPath('/usr/local/cuda/lib64/libcudart.so.11.0'), PosixPath('/usr/local/cuda/lib64/libcudart.so')}.. We'll flip a coin and try one of these, in order to fail forward.\n", "Either way, this might cause trouble in the future:\n", "If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.\n", " warn(msg)\n" ] } ], "source": [ "import os\n", "\n", "import torch\n", "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, default_data_collator, get_linear_schedule_with_warmup\n", "from peft import get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit\n", "from torch.utils.data import DataLoader\n", "from tqdm import tqdm\n", "from datasets import load_dataset\n", "\n", "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", "\n", "device = \"cuda\"\n", "model_name_or_path = \"t5-large\"\n", "tokenizer_name_or_path = \"t5-large\"\n", "\n", "checkpoint_name = \"financial_sentiment_analysis_prompt_tuning_v1.pt\"\n", "text_column = \"sentence\"\n", "label_column = \"text_label\"\n", "max_length = 128\n", "lr = 1\n", "num_epochs = 5\n", "batch_size = 8" ] }, { "cell_type": "code", "execution_count": 2, "id": "8d0850ac", "metadata": { "ExecuteTime": { "end_time": "2023-05-30T08:38:12.413984Z", "start_time": "2023-05-30T08:38:04.601042Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "trainable params: 40960 || all params: 737709056 || trainable%: 0.005552324411210698\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/udir/tschilla/anaconda3/envs/peft/lib/python3.9/site-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n", "For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n", "- Be aware that you SHOULD NOT rely on t5-large automatically truncating your input to 512 when padding/encoding.\n", "- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n", "- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "PeftModelForSeq2SeqLM(\n", " (base_model): T5ForConditionalGeneration(\n", " (shared): Embedding(32128, 1024)\n", " (encoder): T5Stack(\n", " (embed_tokens): Embedding(32128, 1024)\n", " (block): ModuleList(\n", " (0): T5Block(\n", " (layer): ModuleList(\n", " (0): T5LayerSelfAttention(\n", " (SelfAttention): T5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", " (v): Linear(in_features=1024, out_features=1024, bias=False)\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", " (relative_attention_bias): Embedding(32, 16)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (1): T5LayerFF(\n", " (DenseReluDense): T5DenseActDense(\n", " (wi): Linear(in_features=1024, out_features=4096, bias=False)\n", " (wo): Linear(in_features=4096, out_features=1024, bias=False)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " (act): ReLU()\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " (1-23): 23 x T5Block(\n", " (layer): ModuleList(\n", " (0): T5LayerSelfAttention(\n", " (SelfAttention): T5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", " (v): Linear(in_features=1024, out_features=1024, bias=False)\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (1): T5LayerFF(\n", " (DenseReluDense): T5DenseActDense(\n", " (wi): Linear(in_features=1024, out_features=4096, bias=False)\n", " (wo): Linear(in_features=4096, out_features=1024, bias=False)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " (act): ReLU()\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " )\n", " (final_layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (decoder): T5Stack(\n", " (embed_tokens): Embedding(32128, 1024)\n", " (block): ModuleList(\n", " (0): T5Block(\n", " (layer): ModuleList(\n", " (0): T5LayerSelfAttention(\n", " (SelfAttention): T5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", " (v): Linear(in_features=1024, out_features=1024, bias=False)\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", " (relative_attention_bias): Embedding(32, 16)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (1): T5LayerCrossAttention(\n", " (EncDecAttention): T5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", " (v): Linear(in_features=1024, out_features=1024, bias=False)\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (2): T5LayerFF(\n", " (DenseReluDense): T5DenseActDense(\n", " (wi): Linear(in_features=1024, out_features=4096, bias=False)\n", " (wo): Linear(in_features=4096, out_features=1024, bias=False)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " (act): ReLU()\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " (1-23): 23 x T5Block(\n", " (layer): ModuleList(\n", " (0): T5LayerSelfAttention(\n", " (SelfAttention): T5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", " (v): Linear(in_features=1024, out_features=1024, bias=False)\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (1): T5LayerCrossAttention(\n", " (EncDecAttention): T5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", " (v): Linear(in_features=1024, out_features=1024, bias=False)\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (2): T5LayerFF(\n", " (DenseReluDense): T5DenseActDense(\n", " (wi): Linear(in_features=1024, out_features=4096, bias=False)\n", " (wo): Linear(in_features=4096, out_features=1024, bias=False)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " (act): ReLU()\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " )\n", " (final_layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (lm_head): Linear(in_features=1024, out_features=32128, bias=False)\n", " )\n", " (prompt_encoder): ModuleDict(\n", " (default): PromptEmbedding(\n", " (embedding): Embedding(40, 1024)\n", " )\n", " )\n", " (word_embeddings): Embedding(32128, 1024)\n", ")" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# creating model\n", "peft_config = PromptTuningConfig(\n", " task_type=TaskType.SEQ_2_SEQ_LM,\n", " prompt_tuning_init=PromptTuningInit.TEXT,\n", " num_virtual_tokens=20,\n", " prompt_tuning_init_text=\"What is the sentiment of this article?\\n\",\n", " inference_mode=False,\n", " tokenizer_name_or_path=model_name_or_path,\n", ")\n", "\n", "model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path)\n", "model = get_peft_model(model, peft_config)\n", "model.print_trainable_parameters()\n", "model" ] }, { "cell_type": "code", "execution_count": 3, "id": "4ee2babf", "metadata": { "ExecuteTime": { "end_time": "2023-05-30T08:38:18.759143Z", "start_time": "2023-05-30T08:38:17.881621Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Found cached dataset financial_phrasebank (/data/proxem/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141)\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "fb63f50cb7cb4f5aae10648ba74d6c4e", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/1 [00:00