Penghaoo commited on Jul 25, 2024

Commit

4d3e798

verified ·

1 Parent(s): 902ae55

End of training

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +23 -0
.ipynb_checkpoints/Untitled-checkpoint.ipynb +403 -0
.ipynb_checkpoints/Untitled1-checkpoint.ipynb +6 -0
.ipynb_checkpoints/dataloader-checkpoint.py +295 -0
.ipynb_checkpoints/reward_modeling-checkpoint.py +158 -0
.ipynb_checkpoints/rm-checkpoint.ipynb +565 -0
README.md +59 -0
RMmodels/gemma-2-9b_sftm3genre36007200/README.md +202 -0
RMmodels/gemma-2-9b_sftm3genre36007200/adapter_config.json +31 -0
RMmodels/gemma-2-9b_sftm3genre36007200/adapter_model.safetensors +3 -0
RMmodels/gemma-2-9b_sftm3genre36007200/special_tokens_map.json +34 -0
RMmodels/gemma-2-9b_sftm3genre36007200/tokenizer.json +3 -0
RMmodels/gemma-2-9b_sftm3genre36007200/tokenizer_config.json +1760 -0
RMmodels/gemma-2-9b_sftm3genre36007200/training_args.bin +3 -0
SFTmodels/gemma-2-9b_sftm2genre100714/README.md +202 -0
SFTmodels/gemma-2-9b_sftm2genre100714/adapter_config.json +31 -0
SFTmodels/gemma-2-9b_sftm2genre100714/adapter_model.safetensors +3 -0
SFTmodels/gemma-2-9b_sftm2genre100714/special_tokens_map.json +28 -0
SFTmodels/gemma-2-9b_sftm2genre100714/tokenizer.json +3 -0
SFTmodels/gemma-2-9b_sftm2genre100714/tokenizer_config.json +1756 -0
SFTmodels/gemma-2-9b_sftm2genre100714/training_args.bin +3 -0
SFTmodels/gemma-2-9b_sftm3genre1800/README.md +202 -0
SFTmodels/gemma-2-9b_sftm3genre1800/adapter_config.json +31 -0
SFTmodels/gemma-2-9b_sftm3genre1800/adapter_model.safetensors +3 -0
SFTmodels/gemma-2-9b_sftm3genre1800/special_tokens_map.json +28 -0
SFTmodels/gemma-2-9b_sftm3genre1800/tokenizer.json +3 -0
SFTmodels/gemma-2-9b_sftm3genre1800/tokenizer_config.json +1756 -0
SFTmodels/gemma-2-9b_sftm3genre1800/training_args.bin +3 -0
SFTmodels/gemma-2-9b_sftm3genre3600/README.md +202 -0
SFTmodels/gemma-2-9b_sftm3genre3600/adapter_config.json +31 -0
SFTmodels/gemma-2-9b_sftm3genre3600/adapter_model.safetensors +3 -0
SFTmodels/gemma-2-9b_sftm3genre3600/special_tokens_map.json +28 -0
SFTmodels/gemma-2-9b_sftm3genre3600/tokenizer.json +3 -0
SFTmodels/gemma-2-9b_sftm3genre3600/tokenizer_config.json +1756 -0
SFTmodels/gemma-2-9b_sftm3genre3600/training_args.bin +3 -0
SFTmodels/gemma-2-9b_sftm3genre7200/README.md +202 -0
SFTmodels/gemma-2-9b_sftm3genre7200/adapter_config.json +31 -0
SFTmodels/gemma-2-9b_sftm3genre7200/adapter_model.safetensors +3 -0
SFTmodels/gemma-2-9b_sftm3genre7200/special_tokens_map.json +28 -0
SFTmodels/gemma-2-9b_sftm3genre7200/tokenizer.json +3 -0
SFTmodels/gemma-2-9b_sftm3genre7200/tokenizer_config.json +1756 -0
SFTmodels/gemma-2-9b_sftm3genre7200/training_args.bin +3 -0
Untitled.ipynb +744 -0
Untitled1.ipynb +1519 -0
adapter_config.json +31 -0
adapter_model.safetensors +3 -0
dataloader.py +296 -0
model/SFTmodels/gemma-2b_sftm3genre10vast/README.md +202 -0
model/SFTmodels/gemma-2b_sftm3genre10vast/adapter_config.json +34 -0
model/SFTmodels/gemma-2b_sftm3genre10vast/adapter_model.safetensors +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+RMmodels/gemma-2-9b_sftm3genre36007200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+SFTmodels/gemma-2-9b_sftm2genre100714/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+SFTmodels/gemma-2-9b_sftm3genre1800/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+SFTmodels/gemma-2-9b_sftm3genre3600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+SFTmodels/gemma-2-9b_sftm3genre7200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+model/gemma-2b_sftm3genre10/checkpoint-1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+model/gemma-2b_sftm3genre10/checkpoint-1500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+model/gemma-2b_sftm3genre10/checkpoint-2000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+model/gemma-2b_sftm3genre10/checkpoint-2500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+model/gemma-2b_sftm3genre10/checkpoint-3000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+model/gemma-2b_sftm3genre10/checkpoint-500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+model/gemma-2b_sftm3genre10/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+readsy/0117_2story_pairs.csv filter=lfs diff=lfs merge=lfs -text
+readsy/pairs/readsy_story_pairs0407.csv filter=lfs diff=lfs merge=lfs -text
+readsy/readsy_story_pairs0407.csv filter=lfs diff=lfs merge=lfs -text
+readsy/stories/prompt_1017.csv filter=lfs diff=lfs merge=lfs -text
+readsy/stories/prompt_1064.csv filter=lfs diff=lfs merge=lfs -text
+readsy/stories/prompt_1073.csv filter=lfs diff=lfs merge=lfs -text
+readsy/stories/prompt_1104.csv filter=lfs diff=lfs merge=lfs -text
+readsy/stories/prompt_1109.csv filter=lfs diff=lfs merge=lfs -text
+readsy/stories/prompt_1174.csv filter=lfs diff=lfs merge=lfs -text
+readsy/stories/prompt_1229.csv filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

.ipynb_checkpoints/Untitled-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,403 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "aa178322-0de1-46e3-bdaa-935d448cafda",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n"
+     ]
+    }
+   ],
+   "source": [
+    "#SFT \n",
+    "from unsloth import FastLanguageModel\n",
+    "import torch\n",
+    "max_seq_length = 2048*4 # Choose any! We auto support RoPE Scaling internally!\n",
+    "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+    "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
+    "datapath = 'readsy/stories/'\n",
+    "pairpath = 'readsy/pairs/readsy_story_pairs0407.csv'\n",
+    "mode='m3'\n",
+    "split_by = 'genre'\n",
+    "model_name = 'model/gemma/gemma-2b/'\n",
+    "lease_likes = 10\n",
+    "suffix = 'vast'\n",
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-1] + '_sft' + mode + split_by + str(lease_likes) + suffix\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "280c81eb-4879-41d9-aea4-1dffc2edf836",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "==((====))==  Unsloth: Fast Gemma patching release 2024.5\n",
+      "   \\\\   /|    GPU: NVIDIA GeForce RTX 4090. Max memory: 23.643 GB. Platform = Linux.\n",
+      "O^O/ \\_/ \\    Pytorch: 2.2.0+cu121. CUDA = 8.9. CUDA Toolkit = 12.1.\n",
+      "\\        /    Bfloat16 = TRUE. Xformers = 0.0.24. FA = True.\n",
+      " \"-____-\"     Free Apache license: http://github.com/unslothai/unsloth\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.\n",
+      "Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use\n",
+      "`config.hidden_activation` if you want to override this behaviour.\n",
+      "See https://github.com/huggingface/transformers/pull/29402 for more details.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a50c6bf9f7224c698cf118c51cd379bf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Unsloth 2024.5 patched 18 layers with 18 QKV layers, 18 O layers and 18 MLP layers.\n"
+     ]
+    }
+   ],
+   "source": [
+    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+    "    model_name = model_name, # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dtype = dtype,\n",
+    "    load_in_4bit = load_in_4bit,\n",
+    "    # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n",
+    ")\n",
+    "model = FastLanguageModel.get_peft_model(\n",
+    "    model,\n",
+    "    use_gradient_checkpointing = \"unsloth\",\n",
+    "    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
+    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
+    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
+    "    lora_alpha = 16,\n",
+    "    lora_dropout = 0, # Supports any, but = 0 is optimized\n",
+    "    bias = \"none\",    # Supports any, but = \"none\" is optimized\n",
+    "    random_state = 3407,\n",
+    "    use_rslora = False,  # We support rank stabilized LoRA\n",
+    "    loftq_config = None, # And LoftQ\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "5989150b-1ad0-4168-8a28-d0379045ddd7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "the total number of pairs is  100\n",
+      "the number of effective pairs is  90\n",
+      "Index(['prompt_id', 'prompt', 'story_id', 'story_title', 'story_author',\n",
+      "       'story_url', 'link', 'genre', 'is_sensitive', 'categories', 'likes',\n",
+      "       'story_text', 'posted_date', 'comments'],\n",
+      "      dtype='object')\n",
+      "{'Horror': 10, 'Middle School': 7, 'Dialogue': 6, 'Angst': 5, 'Kids': 5, 'Thriller and Suspense': 5, 'Novel': 5, 'Science Fiction': 5, 'Romance': 5, 'Adventure': 4, 'Narrative': 3, 'Winter': 3, 'Fluff': 3, 'Mystery': 3, 'Character': 3, 'Teens': 2, 'Dramatic': 2, 'Funny': 2, 'Sad': 2, 'Adults': 2, 'High School': 2, \"Valentine's Day\": 1, 'Short Story': 1, 'Summer': 1, 'Holiday': 1, 'Christmas': 1, 'Fiction': 1}\n",
+      "the genre of test set is  ['Horror']\n",
+      "the percentage of test set is  0.1111111111111111 where total is  90\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No chat template is set for this tokenizer, falling back to a default class-level template. This is very error-prone, because models are often trained with templates different from the class default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which point any code depending on them will stop working. We recommend setting a valid chat template before then to ensure that this model continues working without issues.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "the columns of train is  Index(['story1_id', 'story2_id', 'prompt_id'], dtype='object')\n",
+      "the first example of train is  story1_id                                               7gz4qo\n",
+      "story2_id                                               7gz4qo\n",
+      "prompt_id                                          prompt_0156\n",
+      "text         <bos><|im_start|>user\\nWrite a story where a c...\n",
+      "Name: 0, dtype: object\n"
+     ]
+    }
+   ],
+   "source": [
+    "from dataloader import StoryPairDataset\n",
+    "SPdataloader = StoryPairDataset(datapath,\n",
+    "                              pairpath,\n",
+    "                              tokenizer,\n",
+    "                              task='sft',\n",
+    "                              used_dataset_size=100,\n",
+    "                              train_test_split=0.1,\n",
+    "                              split_by=split_by,\n",
+    "                              max_len=4096,\n",
+    "                              mode= mode,\n",
+    "                              max_time_window=3600,\n",
+    "                              least_likes= lease_likes,\n",
+    "                              margin=False)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ec67afee-86b1-4c91-b3ad-013db3e36bf5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "3d804ea0-5619-49a8-87b7-1e6149589865",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n",
+      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1\n",
+      "   \\\\   /|    Num examples = 48 | Num Epochs = 1\n",
+      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient Accumulation steps = 2\n",
+      "\\        /    Total batch size = 4 | Total steps = 12\n",
+      " \"-____-\"     Number of trainable parameters = 19,611,648\n"
+     ]
+    },
+    {
+     "ename": "OutOfMemoryError",
+     "evalue": "CUDA out of memory. Tried to allocate 15.62 GiB. GPU 0 has a total capacity of 23.64 GiB of which 10.97 GiB is free. Process 1232897 has 12.67 GiB memory in use. Of the allocated memory 12.13 GiB is allocated by PyTorch, and 82.25 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mOutOfMemoryError\u001b[0m                          Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[15], line 30\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TrainingArguments\n\u001b[1;32m      5\u001b[0m trainer \u001b[38;5;241m=\u001b[39m SFTTrainer(\n\u001b[1;32m      6\u001b[0m     model \u001b[38;5;241m=\u001b[39m model,\n\u001b[1;32m      7\u001b[0m     tokenizer \u001b[38;5;241m=\u001b[39m tokenizer,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     28\u001b[0m     ),\n\u001b[1;32m     29\u001b[0m )\n\u001b[0;32m---> 30\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     31\u001b[0m \u001b[38;5;66;03m#save the model AND the tokenizer\u001b[39;00m\n\u001b[1;32m     32\u001b[0m trainer\u001b[38;5;241m.\u001b[39msave_model(save_path)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:361\u001b[0m, in \u001b[0;36mSFTTrainer.train\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    358\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n\u001b[1;32m    359\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trl_activate_neftune(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel)\n\u001b[0;32m--> 361\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    363\u001b[0m \u001b[38;5;66;03m# After training we make sure to retrieve back the original forward pass method\u001b[39;00m\n\u001b[1;32m    364\u001b[0m \u001b[38;5;66;03m# for the embedding layer by removing the forward post hook.\u001b[39;00m\n\u001b[1;32m    365\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mneftune_noise_alpha \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_trainer_supports_neftune:\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1885\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1883\u001b[0m         hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m   1884\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1885\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1886\u001b[0m \u001b[43m        \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1887\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1888\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1889\u001b[0m \u001b[43m        \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1890\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m<string>:352\u001b[0m, in \u001b[0;36m_fast_inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/trainer.py:3238\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m   3235\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m loss_mb\u001b[38;5;241m.\u001b[39mreduce_mean()\u001b[38;5;241m.\u001b[39mdetach()\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m   3237\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[0;32m-> 3238\u001b[0m     loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3240\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m inputs\n\u001b[1;32m   3241\u001b[0m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mempty_cache()\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/trainer.py:3264\u001b[0m, in \u001b[0;36mTrainer.compute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m   3262\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   3263\u001b[0m     labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3264\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3265\u001b[0m \u001b[38;5;66;03m# Save past state if it exists\u001b[39;00m\n\u001b[1;32m   3266\u001b[0m \u001b[38;5;66;03m# TODO: this needs to be fixed and made cleaner later.\u001b[39;00m\n\u001b[1;32m   3267\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mpast_index \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:822\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    821\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 822\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:810\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    809\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 810\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/amp/autocast_mode.py:16\u001b[0m, in \u001b[0;36mautocast_decorator.<locals>.decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m     14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     15\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 16\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:822\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    821\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 822\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:810\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    809\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 810\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/amp/autocast_mode.py:16\u001b[0m, in \u001b[0;36mautocast_decorator.<locals>.decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m     14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     15\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 16\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:822\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    821\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 822\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:810\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    809\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 810\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mconvert_to_fp32\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:789\u001b[0m, in \u001b[0;36mconvert_to_fp32\u001b[0;34m(tensor)\u001b[0m\n\u001b[1;32m    783\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_is_fp16_bf16_tensor\u001b[39m(tensor):\n\u001b[1;32m    784\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m (is_torch_tensor(tensor) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(tensor, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m)) \u001b[38;5;129;01mand\u001b[39;00m tensor\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;129;01min\u001b[39;00m (\n\u001b[1;32m    785\u001b[0m         torch\u001b[38;5;241m.\u001b[39mfloat16,\n\u001b[1;32m    786\u001b[0m         torch\u001b[38;5;241m.\u001b[39mbfloat16,\n\u001b[1;32m    787\u001b[0m     )\n\u001b[0;32m--> 789\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mrecursively_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_convert_to_fp32\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtensor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_is_fp16_bf16_tensor\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:118\u001b[0m, in \u001b[0;36mrecursively_apply\u001b[0;34m(func, data, test_type, error_on_other_type, *args, **kwargs)\u001b[0m\n\u001b[1;32m    107\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m honor_type(\n\u001b[1;32m    108\u001b[0m         data,\n\u001b[1;32m    109\u001b[0m         (\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    114\u001b[0m         ),\n\u001b[1;32m    115\u001b[0m     )\n\u001b[1;32m    116\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data, Mapping):\n\u001b[1;32m    117\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(data)(\n\u001b[0;32m--> 118\u001b[0m         {\n\u001b[1;32m    119\u001b[0m             k: recursively_apply(\n\u001b[1;32m    120\u001b[0m                 func, v, \u001b[38;5;241m*\u001b[39margs, test_type\u001b[38;5;241m=\u001b[39mtest_type, error_on_other_type\u001b[38;5;241m=\u001b[39merror_on_other_type, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[1;32m    121\u001b[0m             )\n\u001b[1;32m    122\u001b[0m             \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m    123\u001b[0m         }\n\u001b[1;32m    124\u001b[0m     )\n\u001b[1;32m    125\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m test_type(data):\n\u001b[1;32m    126\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m func(data, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:119\u001b[0m, in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    107\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m honor_type(\n\u001b[1;32m    108\u001b[0m         data,\n\u001b[1;32m    109\u001b[0m         (\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    114\u001b[0m         ),\n\u001b[1;32m    115\u001b[0m     )\n\u001b[1;32m    116\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data, Mapping):\n\u001b[1;32m    117\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(data)(\n\u001b[1;32m    118\u001b[0m         {\n\u001b[0;32m--> 119\u001b[0m             k: \u001b[43mrecursively_apply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    120\u001b[0m \u001b[43m                \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror_on_other_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merror_on_other_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m    121\u001b[0m \u001b[43m            \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    122\u001b[0m             \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m data\u001b[38;5;241m.\u001b[39mitems()\n\u001b[1;32m    123\u001b[0m         }\n\u001b[1;32m    124\u001b[0m     )\n\u001b[1;32m    125\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m test_type(data):\n\u001b[1;32m    126\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m func(data, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:126\u001b[0m, in \u001b[0;36mrecursively_apply\u001b[0;34m(func, data, test_type, error_on_other_type, *args, **kwargs)\u001b[0m\n\u001b[1;32m    117\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(data)(\n\u001b[1;32m    118\u001b[0m         {\n\u001b[1;32m    119\u001b[0m             k: recursively_apply(\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    123\u001b[0m         }\n\u001b[1;32m    124\u001b[0m     )\n\u001b[1;32m    125\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m test_type(data):\n\u001b[0;32m--> 126\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    127\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m error_on_other_type:\n\u001b[1;32m    128\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m    129\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsupported types (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mtype\u001b[39m(data)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) passed to `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`. Only nested list/tuple/dicts of \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    130\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobjects that are valid for `\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtest_type\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m` should be passed.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    131\u001b[0m     )\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:781\u001b[0m, in \u001b[0;36mconvert_to_fp32.<locals>._convert_to_fp32\u001b[0;34m(tensor)\u001b[0m\n\u001b[1;32m    780\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_convert_to_fp32\u001b[39m(tensor):\n\u001b[0;32m--> 781\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtensor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 15.62 GiB. GPU 0 has a total capacity of 23.64 GiB of which 10.97 GiB is free. Process 1232897 has 12.67 GiB memory in use. Of the allocated memory 12.13 GiB is allocated by PyTorch, and 82.25 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
+     ]
+    }
+   ],
+   "source": [
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-2] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "from trl import SFTTrainer\n",
+    "from transformers import TrainingArguments\n",
+    "\n",
+    "trainer = SFTTrainer(\n",
+    "    model = model,\n",
+    "    tokenizer = tokenizer,\n",
+    "    train_dataset = SPdataloader.dataset[\"train\"],\n",
+    "    eval_dataset = SPdataloader.dataset[\"test\"],\n",
+    "    dataset_text_field = \"text\",\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dataset_num_proc = 1,\n",
+    "    packing = True, # Can make training 5x faster for short sequences.\n",
+    "    args = TrainingArguments(\n",
+    "        per_device_train_batch_size = 1,\n",
+    "        gradient_accumulation_steps = 2,\n",
+    "        warmup_steps = 5,\n",
+    "        num_train_epochs = 1,\n",
+    "        learning_rate = 1e-4,\n",
+    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
+    "        bf16 = torch.cuda.is_bf16_supported(),\n",
+    "        logging_steps = 1,\n",
+    "        optim = \"adamw_8bit\",\n",
+    "        weight_decay = 0.01,\n",
+    "        lr_scheduler_type = \"cosine\",\n",
+    "        seed = 3407,\n",
+    "        output_dir = save_path,\n",
+    "    ),\n",
+    ")\n",
+    "trainer.train()\n",
+    "#save the model AND the tokenizer\n",
+    "trainer.save_model(save_path)\n",
+    "#trainer.save_tokenizer(save_path)\n",
+    "print('model saved at', save_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "357116f9-e206-4a77-acf6-43835d2b83bf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Prompt: Write a story about discovering a lost manuscript. It can be from a famous (or infamous) author, or an unknown one.\n",
+      "inputs: <bos><|im_start|>user\n",
+      "Write a story about discovering a lost manuscript. It can be from a famous (or infamous) author, or an unknown one.<|im_end|>\n",
+      "<|im_start|>assistant\n",
+      "\n",
+      "inputs encoded: tensor([[     2,      2, 235322, 235371,    571, 235298,   2997,  73786,   1645,\n",
+      "            108,   5559,    476,   3904,   1105,  59551,    476,   5501,  28086,\n",
+      "         235265,   1165,    798,    614,    774,    476,  10964,    591,    483,\n",
+      "          76100, 235275,   3426, 235269,    689,    671,  12417,    974,  35606,\n",
+      "         235371,    571, 235298,    615,  73786,    108, 235322, 235371,    571,\n",
+      "         235298,   2997,  73786, 105776,    108]])\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[10], line 23\u001b[0m\n\u001b[1;32m     21\u001b[0m prompt \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWrite a story about discovering a lost manuscript. It can be from a famous (or infamous) author, or an unknown one.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     22\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrompt:\u001b[39m\u001b[38;5;124m\"\u001b[39m, prompt)\n\u001b[0;32m---> 23\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtokenizer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     24\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwritten by the model:\u001b[39m\u001b[38;5;124m'\u001b[39m, model_path)  \n\u001b[1;32m     25\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGenerated story:\u001b[39m\u001b[38;5;124m\"\u001b[39m, outputs)\n",
+      "Cell \u001b[0;32mIn[10], line 14\u001b[0m, in \u001b[0;36mgenerate\u001b[0;34m(model, tokenizer, prompt, max_length)\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[38;5;66;03m# Move inputs to GPU\u001b[39;00m\n\u001b[1;32m     12\u001b[0m inputs \u001b[38;5;241m=\u001b[39m inputs\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 14\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_new_tokens\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mmax_length\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmin_new_tokens\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m500\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     15\u001b[0m \u001b[38;5;66;03m#decode the outputs\u001b[39;00m\n\u001b[1;32m     16\u001b[0m outputs \u001b[38;5;241m=\u001b[39m tokenizer\u001b[38;5;241m.\u001b[39mdecode(outputs[\u001b[38;5;241m0\u001b[39m], skip_special_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/peft/peft_model.py:1491\u001b[0m, in \u001b[0;36mPeftModelForCausalLM.generate\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1489\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_enable_peft_forward_hooks(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m   1490\u001b[0m         kwargs \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mspecial_peft_forward_args}\n\u001b[0;32m-> 1491\u001b[0m         outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1492\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1493\u001b[0m     outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbase_model\u001b[38;5;241m.\u001b[39mgenerate(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py:115\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m    113\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m    114\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 115\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1758\u001b[0m, in \u001b[0;36mGenerationMixin.generate\u001b[0;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001b[0m\n\u001b[1;32m   1750\u001b[0m     input_ids, model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expand_inputs_for_generation(\n\u001b[1;32m   1751\u001b[0m         input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[1;32m   1752\u001b[0m         expand_size\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mnum_return_sequences,\n\u001b[1;32m   1753\u001b[0m         is_encoder_decoder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder,\n\u001b[1;32m   1754\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs,\n\u001b[1;32m   1755\u001b[0m     )\n\u001b[1;32m   1757\u001b[0m     \u001b[38;5;66;03m# 13. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)\u001b[39;00m\n\u001b[0;32m-> 1758\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sample\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1759\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1760\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlogits_processor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_logits_processor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1761\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlogits_warper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_logits_warper\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1762\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstopping_criteria\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_stopping_criteria\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1763\u001b[0m \u001b[43m        \u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1764\u001b[0m \u001b[43m        \u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msynced_gpus\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1765\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstreamer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstreamer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1766\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1767\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1769\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m generation_mode \u001b[38;5;129;01min\u001b[39;00m (GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SAMPLE, GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SEARCH):\n\u001b[1;32m   1770\u001b[0m     \u001b[38;5;66;03m# 11. prepare logits warper\u001b[39;00m\n\u001b[1;32m   1771\u001b[0m     prepared_logits_warper \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m   1772\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_logits_warper(generation_config) \u001b[38;5;28;01mif\u001b[39;00m generation_config\u001b[38;5;241m.\u001b[39mdo_sample \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1773\u001b[0m     )\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:2392\u001b[0m, in \u001b[0;36mGenerationMixin._sample\u001b[0;34m(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, logits_warper, **model_kwargs)\u001b[0m\n\u001b[1;32m   2389\u001b[0m unfinished_sequences \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mones(batch_size, dtype\u001b[38;5;241m=\u001b[39mtorch\u001b[38;5;241m.\u001b[39mlong, device\u001b[38;5;241m=\u001b[39minput_ids\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m   2390\u001b[0m model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_initial_cache_position(input_ids, model_kwargs)\n\u001b[0;32m-> 2392\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_has_unfinished_sequences\u001b[49m\u001b[43m(\u001b[49m\u001b[43mthis_peer_finished\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m   2393\u001b[0m     \u001b[38;5;66;03m# prepare model inputs\u001b[39;00m\n\u001b[1;32m   2394\u001b[0m     model_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprepare_inputs_for_generation(input_ids, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs)\n\u001b[1;32m   2396\u001b[0m     \u001b[38;5;66;03m# forward pass to get next token\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1922\u001b[0m, in \u001b[0;36mGenerationMixin._has_unfinished_sequences\u001b[0;34m(self, this_peer_finished, synced_gpus, device)\u001b[0m\n\u001b[1;32m   1920\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m this_peer_finished_flag\u001b[38;5;241m.\u001b[39mitem() \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0.0\u001b[39m:\n\u001b[1;32m   1921\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m-> 1922\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m this_peer_finished:\n\u001b[1;32m   1923\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m   1924\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "\n",
+    "\n",
+    "def generate(model, tokenizer, prompt, max_length=1024*4):\n",
+    "    chat = [\n",
+    "        {\"role\":\"user\", \"content\":prompt},\n",
+    "    ]\n",
+    "    inputs = tokenizer.apply_chat_template(chat, tokenize = False, add_generation_prompt = True)\n",
+    "    #add bos token\n",
+    "    inputs = tokenizer.bos_token + inputs\n",
+    "    print(\"inputs:\", inputs)\n",
+    "    inputs = tokenizer.encode(inputs, add_special_tokens=True, return_tensors=\"pt\")\n",
+    "    print(\"inputs encoded:\", inputs)\n",
+    "    # Move inputs to GPU\n",
+    "    inputs = inputs.to(\"cuda\")\n",
+    "    \n",
+    "    outputs = model.generate(input_ids=inputs, max_new_tokens = max_length, min_new_tokens = 500)\n",
+    "    #decode the outputs\n",
+    "    outputs = tokenizer.decode(outputs[0], skip_special_tokens=False)\n",
+    "    return outputs\n",
+    "\n",
+    "\n",
+    "\n",
+    "prompt = \"Write a story about discovering a lost manuscript. It can be from a famous (or infamous) author, or an unknown one.\"\n",
+    "print(\"Prompt:\", prompt)\n",
+    "outputs = generate(model, tokenizer, prompt)\n",
+    "print('written by the model:', model_path)  \n",
+    "print(\"Generated story:\", outputs)\n",
+    "print(\"Length of the generated story:\", len(outputs.split()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "20c32f2e-0da4-446c-a722-74ebef7eb508",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'model/SFTmodels/gemma-2b_sftm3genre10vast'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-2] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "save_path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "859e0d8d-e677-4fca-981c-bca2590f2250",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'<pad>'"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "478d07be-fbfc-4ce1-841a-9345ff2a1cbd",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

.ipynb_checkpoints/Untitled1-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

.ipynb_checkpoints/dataloader-checkpoint.py ADDED Viewed

	@@ -0,0 +1,295 @@

+from datasets import Dataset, DatasetDict
+import pandas as pd
+import numpy as np
+import glob
+from sklearn.model_selection import train_test_split
+import re
+datapath = '/cluster/work/lawecon/Work/penghao/dataset/stories/'
+pairpath = '../../../work/lawecon/Work/penghao/pairs.csv'
+#3600 ->time lags
+class StoryPairDataset(Dataset):
+    def __init__(self, datapath, pairpath, tokenizer, task, used_dataset_size=-1, train_test_split=0.1,
+                 split_by='random',
+                 max_len=4096*2, mode='m3', max_time_window=3000, least_likes=5, margin=True):
+        self.datapath = datapath
+        print(self.datapath)
+        self.train_test_split = train_test_split
+        self.pairpath = pairpath
+        self.tokenizer = tokenizer
+        self.max_len = max_len
+        self.split_by = split_by
+        self.least_likes = least_likes
+        self.max_time_window = max_time_window
+        self.used_dataset_size = used_dataset_size
+        if mode == 'm2':
+            self.max_time_window = 12009600
+        else:
+            self.max_time_window = max_time_window
+        self.pair = self.load_pair()
+        self.task = task
+        self.margin = margin
+        self.stories = self.load_stories(self.datapath)
+        print(self.stories.columns)
+        print(len(self.stories))
+        # turn df into dataset
+        # self.dataset = datasets.Dataset.from_pandas(self.df)
+        self.train, self.test = self.train_test_split__()
+        self.train = self.marginInclude(self.train)
+        self.test = self.marginInclude(self.test)
+        # combine train and test to a single dataset, before train and test
+        self.dataset = self.make_dataset()
+        print('current setting mode is ', mode)
+        print('currnet setting split_by is ', split_by)
+        print('current setting least_likes is ', least_likes)
+    def load_stories(self, path):
+        stories = pd.DataFrame()
+        #print(f"Reading stories from {path}...")
+        for file in glob.glob(path + '*.csv'):
+            #print(f"Reading {file}...")
+            try:
+                # Read the CSV file into a DataFrame
+                df = pd.read_csv(file)
+                # Check if the DataFrame is empty or not
+                if df.empty:
+                    print(f"Warning: {file} is empty or not readable.")
+                    continue
+                # Concatenate the DataFrames
+                stories = pd.concat([stories, df], ignore_index=True)
+            except pd.errors.EmptyDataError:
+                # print(f"Error: {file} is empty or not readable.")
+                pass
+            except pd.errors.ParserError:
+                print(f"Error: {file} cannot be parsed.")
+            except Exception as e:
+                print(f"Error: An unexpected error occurred while processing {file}. Details: {str(e)}")
+        # contain Index(['prompt_id', 'prompt', 'story_id', 'story_title', 'story_author', 'story_url', 'link', 'genre', 'is_sensitive', 'categories', 'likes', 'story_text', 'posted_date', 'comments'], dtype='object')
+        return stories
+    def load_pair(self):
+        pair = pd.read_csv(self.pairpath)
+        # contain the colums of prompt_id, story1_id, story2_id, rel, time_lag, least_likes
+        pair = pair[pair['time_lag'] <= self.max_time_window]
+        print('the max of tima lag is ', pair['time_lag'].max())
+        pair = pair[pair['least_likes'] >= self.least_likes]
+        # swap the order of story1 and story2 if rel is negative, and makes rel positive
+        pair.loc[pair['rel'] < 0, ['story1_id', 'story2_id']] = pair.loc[
+            pair['rel'] < 0, ['story2_id', 'story1_id']].values
+        pair['rel'] = abs(pair['rel'])
+        # filter the pair if they have same story id
+        pair = pair[pair['story1_id'] != pair['story2_id']]
+        if self.used_dataset_size == -1:
+            self.used_dataset_size = len(pair)
+        else:
+            pair = pair.sample(n=self.used_dataset_size)
+        print('the total number of pairs is ', len(pair))
+        # remove the duplicate pairs
+        pair = pair.drop_duplicates(subset=['story1_id', 'story2_id'])
+        #remove the rel = 0
+        pair = pair[pair['rel'] != 0]
+        print('the number of effective pairs is ', len(pair))
+        return pair
+    def marginInclude(self, df):
+        if self.margin:
+            # drop the column of rel
+            df = df.drop(columns=['rel'])
+        else:
+            # rename rel to margin
+            df = df.rename(columns={'rel': 'margin'})
+        return df
+    def train_test_split__(self):
+        '''
+        split the pairs into train and test set
+        :return:
+        '''
+        test_size = round(len(self.pair) * self.train_test_split)
+        if self.split_by == 'time':
+            # give the pair the information of year according to the story_id
+            self.stories['posted_date'] = pd.to_datetime(self.stories['posted_date'])
+            #convert datetime64[ns] to comparable format, e.g.  2021-04-27 23:29:00 -> 20210427
+            self.stories['posted_date'] = self.stories['posted_date'].dt.strftime('%Y%m%d')
+            # the time after 2022 is test set
+            test = self.pair[self.pair['story1_id'].apply(lambda x: int(self.stories[self.stories['story_id'] == x]['posted_date'].values[0]) > 20220000)]
+            train = self.pair[self.pair['story1_id'].apply(lambda x: int(self.stories[self.stories['story_id'] == x]['posted_date'].values[0]) <= 20220000)]
+            print('the number of test set is ', len(test))
+            print('the number of train set is ', len(train))
+            print('the ratio of test set is ', len(test) / (len(test) + len(train)))
+        elif self.split_by == 'random':
+            train, test = train_test_split(self.pair, test_size=self.train_test_split)
+            # covert to huggingface dataset
+        elif self.split_by == 'genre':
+            # count the number of pairs for each category
+            # give the pair the information of category according to the story_id
+            self.pair['genre'] = self.pair['story1_id'].apply(
+                lambda x: self.stories[self.stories['story_id'] == x]['genre'].values[0])
+            genre = {}
+            for c in self.pair['genre'].unique():
+                genre[c] = len(self.pair[self.pair['genre'] == c])
+            # select the category to nearest to 10 per cent of the total
+            genre = dict(sorted(genre.items(), key=lambda item: item[1], reverse=True))#sort the genre by the number of pairs from high to low
+            print(genre)
+            total = sum(genre.values())
+            #select the close genre to 10% of the total
+            test_genre = []
+            test_count = 0
+            while test_count < total * self.train_test_split:
+                test_genre.append(list(genre.keys())[0])
+                test_count += genre[list(genre.keys())[0]]
+                del genre[list(genre.keys())[0]]
+                if test_count + genre[list(genre.keys())[0]] > total * self.train_test_split:
+                    break
+            test = self.pair[self.pair['genre'].apply(lambda x: x in test_genre)]
+            train = self.pair[self.pair['genre'].apply(lambda x: x not in test_genre)]
+            print('the genre of test set is ', test_genre)
+            print('the percentage of test set is ', test_count / total,'where total is ', total)
+        elif self.split_by == 'chaos':
+            #instead using the pairs, we randomly assign the story id to replace the old story id from that prompt
+            for i in range(len(self.pair)):
+                self.pair.at[i, 'story1_id'] = np.random.choice(self.stories[self.stories['prompt_id'] == self.pair.at[i, 'prompt_id']]['story_id'].values)
+                self.pair.at[i, 'story2_id'] = np.random.choice(self.stories[self.stories['prompt_id'] == self.pair.at[i, 'prompt_id']]['story_id'].values)
+            train, test = train_test_split(self.pair, test_size=self.train_test_split)
+        return train, test
+    def apply_template_to_text(self, row):
+        # Ensure proper access to columns in pair
+        prompt_id, story1_id, story2_id = row[['prompt_id', 'story1_id', 'story2_id']]
+        # Extract text based on IDs
+        chosen_prompt = self.stories[self.stories['prompt_id'] == prompt_id]['prompt']
+        chosen_prompt = chosen_prompt.values[0]
+        chosen_story = self.stories[self.stories['story_id'] == story1_id]['story_title'].values[0] + '/n' + \
+                       self.stories[self.stories['story_id'] == story1_id]['story_text'].values[0]
+        rejected_prompt = self.stories[self.stories['prompt_id'] == prompt_id]['prompt']
+        rejected_prompt = rejected_prompt.values[0]
+        rejected_story = self.stories[self.stories['story_id'] == story2_id]['story_title'].values[0] + '/n' + \
+                            self.stories[self.stories['story_id'] == story2_id]['story_text'].values[0]
+        # Create chosen and rejected text dictionaries
+        chosen_text = [{'role': 'user', 'content': chosen_prompt},
+                       {'role': 'assistant', 'content': chosen_story}]
+        rejected_text = [{'role': 'user', 'content': rejected_prompt},
+                         {'role': 'assistant', 'content': rejected_story}]
+        # Apply tokenizer to chosen and rejected text
+        chosen_text = self.tokenizer.apply_chat_template(chosen_text, tokenize=False)
+        rejected_text = self.tokenizer.apply_chat_template(rejected_text, tokenize=False)
+        res = {}
+        res['chosen_text'] = chosen_text
+        res['rejected_text'] = rejected_text
+        #add eos and bos token
+        res['chosen_text'] = self.tokenizer.bos_token + res['chosen_text'] + self.tokenizer.eos_token
+        res['rejected_text'] = self.tokenizer.bos_token + res['rejected_text'] + self.tokenizer.eos_token
+        res['text'] = chosen_text
+        #add eos and bos token
+        res['text'] = self.tokenizer.bos_token + res['text'] + self.tokenizer.eos_token
+        if 'gemma' in self.tokenizer.name_or_path:
+            split_words = '<|im_start|>assistant\n'
+        elif 'mistral' in self.tokenizer.name_or_path or 'llama' in self.tokenizer.name_or_path:
+            split_words = '[/INST]'
+        chosen_text_tmp = chosen_text.split(split_words)[-1]
+        prompt_text = chosen_text.replace(chosen_text_tmp, '')
+        chosen_text = chosen_text_tmp
+        rejected_text = rejected_text.split(split_words)[-1]
+        res['prompt'] = prompt_text
+        res['chosen'] = chosen_text
+        res['rejected'] = rejected_text
+        # add bos and eos token
+        res['prompt'] = self.tokenizer.bos_token + res['prompt']
+        res['chosen'] = res['chosen'] + self.tokenizer.eos_token
+        res['rejected'] = res['rejected'] + self.tokenizer.eos_token
+        return res
+    def convert_sft(self,df):
+        #collect all the story id in the pair
+        story_ids = list(set(df['story1_id'].values) | set(df['story2_id'].values))
+        #now make new train and test set as story_ids as story1_id and story2_id
+        df = pd.DataFrame()
+        df['story1_id'] = story_ids
+        df['story2_id'] = df['story1_id']
+        #reload stories
+        #self.stories = self.load_stories(self.datapath)
+        # get prompt_id from the pair
+        def get_prompt_id(x):
+            return self.stories[self.stories['story_id'] == x]['prompt_id'].values[0]
+        df['prompt_id'] = df['story1_id'].apply(lambda x: get_prompt_id(x))
+        return df
+    def make_dataset(self):
+        # reset the index
+        self.train.reset_index(drop=True, inplace=True)
+        self.test.reset_index(drop=True, inplace=True)
+        entries = []
+        if self.task == 'rm':
+            entries = ['chosen_text', 'rejected_text']
+        elif self.task == 'dpo':
+            entries = ['prompt', 'chosen', 'rejected']
+        elif self.task == 'sft':
+            self.train = self.convert_sft(self.train)
+            self.test = self.convert_sft(self.test)
+            entries = ['text']
+        print('the columns of train is ', self.train.columns)
+        for index, row in self.train.iterrows():
+            res = self.apply_template_to_text(row)
+            for e in entries:
+                self.train.at[index, e] = res[e]
+        for index, row in self.test.iterrows():
+            res = self.apply_template_to_text(row)
+            for e in entries:
+                self.test.at[index, e] = res[e]
+        print('the first example of train is ', self.train.iloc[0])
+        #since the we aggred on max_len = 8192, we need to filter this
+        if self.margin:
+            entries.append('margin')
+        train_dataset = Dataset.from_pandas(self.train[entries])
+        test_dataset = Dataset.from_pandas(self.test[entries])
+        return DatasetDict({'train': train_dataset, 'test': test_dataset})
+    def save_dataset(self, path):
+        '''
+        save the dataset to the readsy folder
+        :param path:
+        :return:
+        '''
+        self.dataset.save_to_disk('../' + path)

.ipynb_checkpoints/reward_modeling-checkpoint.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+python examples/scripts/reward_modeling.py \
+    --model_name_or_path=facebook/opt-350m \
+    --output_dir="reward_modeling_anthropic_hh" \
+    --per_device_train_batch_size=16 \
+    --num_train_epochs=1 \
+    --gradient_accumulation_steps=2 \
+    --gradient_checkpointing=True \
+    --learning_rate=1.41e-5 \
+    --report_to="wandb" \
+    --remove_unused_columns=False \
+    --optim="adamw_torch" \
+    --logging_steps=10 \
+    --eval_strategy="steps" \
+    --eval_steps=500 \
+    --max_length=512 \
+"""
+import warnings
+import torch
+from datasets import load_dataset
+from tqdm import tqdm
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, HfArgumentParser
+from trl import ModelConfig, RewardConfig, RewardTrainer, get_kbit_device_map, get_peft_config, get_quantization_config
+from dataclasses import dataclass, field
+from transformers import TrainingArguments
+print('imported')
+@dataclass
+class DatasetConfig:
+    reedsy_dataset: str = field(default=True, metadata={"help": "Path to the Reedsy dataset"})
+    datapath: str = field(default=None, metadata={"help": "Path to the dataset"})
+    pairpath: str = field(default=None, metadata={"help": "Path to the story pairs"})
+    split_by: str = field(default="random", metadata={"help": "How to split the dataset"})
+    dt_mode: str = field(default="m3", metadata={"help": "DT mode"})
+    dt_margin: bool = field(default=False, metadata={"help": "DT margin flag"})
+    time_window: int = field(default=3600, metadata={"help": "Time window for DT"})
+    used_dataset_size: int = field(default=-1, metadata={"help": "Size of the dataset to use"})
+tqdm.pandas()
+if __name__ == "__main__":
+    parser = HfArgumentParser((RewardConfig, ModelConfig, DatasetConfig))
+    config, model_config, dataset_config = parser.parse_args_into_dataclasses()
+    config.gradient_checkpointing_kwargs = dict(use_reentrant=False)
+    ################
+    # Model & Tokenizer
+    ################
+    torch_dtype = (
+        model_config.torch_dtype
+        if model_config.torch_dtype in ["auto", None]
+        else getattr(torch, model_config.torch_dtype)
+    )
+    quantization_config = get_quantization_config(model_config)
+    model_kwargs = dict(
+        revision=model_config.model_revision,
+        device_map=get_kbit_device_map() if quantization_config is not None else None,
+        quantization_config=quantization_config,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_config.model_name_or_path, trust_remote_code=model_config.trust_remote_code, use_fast=True
+    )
+    model = AutoModelForSequenceClassification.from_pretrained(
+        model_config.model_name_or_path, num_labels=1, trust_remote_code=model_config.trust_remote_code, **model_kwargs
+    )
+    if model_config.lora_task_type != "SEQ_CLS":
+        warnings.warn(
+            "You are using a `task_type` that is different than `SEQ_CLS` for PEFT. This will lead to silent bugs"
+            " Make sure to pass --lora_task_type SEQ_CLS when using this script."
+        )
+    ################
+    # Dataset
+    ################
+    if not dataset_config.reedsy_dataset:
+        raw_datasets = load_dataset(dataset_config.dataset_name)
+        train_dataset = raw_datasets[dataset_config.dataset_train_split]
+        eval_dataset = raw_datasets[dataset_config.dataset_test_split]
+    else:
+        from dataloader import StoryPairDataset
+        SPdataloader = StoryPairDataset(dataset_config.datapath,
+                                    dataset_config.pairpath,
+                                    tokenizer,
+                                    task='rm',
+                                    used_dataset_size=dataset_config.used_dataset_size,
+                                    train_test_split=0.1,
+                                    split_by=dataset_config.split_by,
+                                    max_len=4096,
+                                    mode= dataset_config.dt_mode,
+                                    max_time_window=dataset_config.time_window,
+                                    least_likes= 10,
+                                    margin=dataset_config.dt_margin)
+    print('dataset ready')
+    def preprocess_function(examples):
+        chosen_text = examples['chosen_text']
+        rejected_text = examples['rejected_text']
+        tokenized_input_chosen = tokenizer(chosen_text, truncation=True)
+        tokenized_input_rejected = tokenizer(rejected_text, truncation=True)
+        examples['input_ids_chosen'] = tokenized_input_chosen['input_ids']
+        examples['attention_mask_chosen'] = tokenized_input_chosen['attention_mask']
+        examples['input_ids_rejected'] = tokenized_input_rejected['input_ids']
+        examples['attention_mask_rejected'] = tokenized_input_rejected['attention_mask']
+        return examples
+    train_dataset = SPdataloader.dataset['train'].map(preprocess_function,num_proc=32)
+    eval_dataset = SPdataloader.dataset['test'].map(preprocess_function,num_proc=32)
+    # Preprocess the dataset and filter out examples that are longer than args.max_length
+    # raw_datasets = raw_datasets.map(
+    #     preprocess_function,
+    #     batched=True,
+    #     num_proc=4,
+    # )
+    # train_dataset = dataloader.dataset['train'].map(preprocess_function,num_proc=32)
+    # eval_dataset = dataloader.dataset['test'].map(preprocess_function,num_proc=32)
+    print('dataset ready')
+    #print('one example:', train_dataset[0])
+    ################
+    # Training
+    ################
+    trainer = RewardTrainer(
+        model=model,
+        tokenizer=tokenizer,
+        args=config,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        peft_config=get_peft_config(model_config),
+    )
+    trainer.train()
+    saving_path = '/workspace/RMmodels/' + model_config.model_name_or_path.split('/')[-1] + str(dataset_config.time_window)
+    trainer.save_model(saving_path)
+    trainer.push_to_hub()
+    metrics = trainer.evaluate()
+    trainer.log_metrics("eval", metrics)
+    print(metrics)

.ipynb_checkpoints/rm-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,565 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "41059be2-24d7-406d-9202-0704d9ca3615",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import warnings\n",
+    "from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig\n",
+    "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
+    "from dataloader import StoryPairDataset\n",
+    "from trl import RewardTrainer, RewardConfig\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "65d882eb-eea1-4103-858c-0254f12971af",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "datapath = 'readsy/stories/'\n",
+    "pairpath = 'readsy/pairs/readsy_story_pairs0407.csv'\n",
+    "model_name = 'model/SFTmodels/gemma-2b_sftm3genre10vast/'\n",
+    "base_model = 'model/gemma/gemma-2b/'\n",
+    "mode='m3' if 'm3' in model_name else 'm2'\n",
+    "if 'random' in model_name:\n",
+    "    split_by = 'random'\n",
+    "elif 'time' in model_name:\n",
+    "    split_by = 'time'\n",
+    "else:\n",
+    "    split_by = 'random'\n",
+    "lease_likes = 10\n",
+    "max_seq_length = 2048*2 # Choose any! We auto support RoPE Scaling internally!\n",
+    "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+    "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
+    "margin = False\n",
+    "save_path = 'model/reward_models/' +model_name.split('/')[-2] + '_rm'\n",
+    "if margin:\n",
+    "    save_path += 'margin'\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "e78f9b52-6a59-4f1a-9923-d521bba02630",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.\n",
+      "`low_cpu_mem_usage` was None, now set to True since model is quantized.\n",
+      "`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.\n",
+      "Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use\n",
+      "`config.hidden_activation` if you want to override this behaviour.\n",
+      "See https://github.com/huggingface/transformers/pull/29402 for more details.\n",
+      "Some weights of GemmaForSequenceClassification were not initialized from the model checkpoint at unsloth/gemma-2b and are newly initialized: ['score.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = AutoModelForSequenceClassification.from_pretrained('unsloth/gemma-2b', num_labels = 1, load_in_4bit=True)\n",
+    "model = PeftModel.from_pretrained(model, model_name)\n",
+    "tokenizer = AutoTokenizer.from_pretrained(base_model)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "0ffc0f5a-0974-4b4a-b48a-34b5bd0b7748",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "peft_config = LoraConfig(\n",
+    "    lora_alpha= 16,\n",
+    "    lora_dropout= 0,\n",
+    "    r= 16,\n",
+    "    bias= \"none\",\n",
+    "    task_type= \"SEQ_CLS\",\n",
+    "    target_modules=[\n",
+    "    \"q_proj\",\n",
+    "    \"up_proj\",\n",
+    "    \"o_proj\",\n",
+    "    \"k_proj\",\n",
+    "    \"down_proj\",\n",
+    "    \"gate_proj\",\n",
+    "    \"v_proj\"],\n",
+    ")\n",
+    "model = get_peft_model(model, peft_config)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "5360a7dc-4e72-4b4a-98dc-dedddaf1f73a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "training_args = RewardConfig(\n",
+    "    num_train_epochs= 3,\n",
+    "    per_device_train_batch_size= 1,\n",
+    "    gradient_accumulation_steps= 1,\n",
+    "    optim = \"adamw_8bit\",\n",
+    "    logging_steps= 5,\n",
+    "    save_strategy= \"epoch\",\n",
+    "    learning_rate= 1e-4, #0 -> test if the model is trainable\n",
+    "    weight_decay= 0.01,\n",
+    "    warmup_steps= 5,\n",
+    "    fp16= not torch.cuda.is_bf16_supported(),\n",
+    "    bf16= torch.cuda.is_bf16_supported(),\n",
+    "    max_grad_norm= 0.3,\n",
+    "    lr_scheduler_type= \"cosine\",\n",
+    "    disable_tqdm= True,\n",
+    "    #report_to= \"wandb\",\n",
+    "    dataloader_drop_last= True,\n",
+    "    max_length= 1024*4,\n",
+    "    output_dir = save_path,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "57742386-f1d2-4ce3-86a6-db9a67ec8e1c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "the total number of pairs is  100\n",
+      "the number of effective pairs is  84\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No chat template is set for this tokenizer, falling back to a default class-level template. This is very error-prone, because models are often trained with templates different from the class default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which point any code depending on them will stop working. We recommend setting a valid chat template before then to ensure that this model continues working without issues.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Index(['prompt_id', 'prompt', 'story_id', 'story_title', 'story_author',\n",
+      "       'story_url', 'link', 'genre', 'is_sensitive', 'categories', 'likes',\n",
+      "       'story_text', 'posted_date', 'comments'],\n",
+      "      dtype='object')\n",
+      "the columns of train is  Index(['prompt_id', 'story1_id', 'story2_id', 'time_lag', 'least_likes'], dtype='object')\n",
+      "the first example of train is  prompt_id                                              prompt_0792\n",
+      "story1_id                                                   15ginj\n",
+      "story2_id                                                   h7yder\n",
+      "time_lag                                                    2100.0\n",
+      "least_likes                                                     11\n",
+      "chosen_text      <bos><|im_start|>user\\nWrite a story about a c...\n",
+      "rejected_text    <bos><|im_start|>user\\nWrite a story about a c...\n",
+      "Name: 0, dtype: object\n"
+     ]
+    }
+   ],
+   "source": [
+    "dataloader = StoryPairDataset(datapath,\n",
+    "                              pairpath,\n",
+    "                              tokenizer,\n",
+    "                              task='rm',\n",
+    "                              used_dataset_size=100,\n",
+    "                              train_test_split=0.1,\n",
+    "                              split_by=split_by,\n",
+    "                              max_len=4096,\n",
+    "                              mode= mode,\n",
+    "                              max_time_window=3600,\n",
+    "                              least_likes= lease_likes,\n",
+    "                              margin= margin)\n",
+    "#map data columns ['chosen_text', 'rejected_text'] into `input_ids_chosen`, `attention_mask_chosen`, `input_ids_rejected` and `attention_mask_rejected` with the tokenizer\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "c11ab177-6e35-44a7-8fa5-e38aca3c7404",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "485a4dc610fd4fb2a608f12326138c4b",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=32):   0%|          | 0/75 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "num_proc must be <= 9. Reducing num_proc to 9 for dataset of size 9.\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9174055e52f343c1800c887572e61b6e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Map (num_proc=9):   0%|          | 0/9 [00:00<?, ? examples/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
+     ]
+    }
+   ],
+   "source": [
+    "def preprocess_function(examples):\n",
+    "    chosen_text = examples['chosen_text']\n",
+    "    rejected_text = examples['rejected_text']\n",
+    "    tokenized_input_chosen = tokenizer(chosen_text, truncation=True)\n",
+    "    tokenized_input_rejected = tokenizer(rejected_text, truncation=True)\n",
+    "    examples['input_ids_chosen'] = tokenized_input_chosen['input_ids']\n",
+    "    examples['attention_mask_chosen'] = tokenized_input_chosen['attention_mask']\n",
+    "    examples['input_ids_rejected'] = tokenized_input_rejected['input_ids']\n",
+    "    examples['attention_mask_rejected'] = tokenized_input_rejected['attention_mask']\n",
+    "    return examples\n",
+    "\n",
+    "traindata = dataloader.dataset['train'].map(preprocess_function,num_proc=32)\n",
+    "testdata = dataloader.dataset['test'].map(preprocess_function,num_proc=32)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "3d78d559-0a9a-48d6-be0d-0c3624e649ad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from dataclasses import dataclass, field\n",
+    "from typing import Any, Dict, List, Optional, Union\n",
+    "from transformers.utils import PaddingStrategy\n",
+    "@dataclass\n",
+    "class RewardDataCollatorWithPadding:\n",
+    "    tokenizer: AutoTokenizer\n",
+    "    padding: Union[bool, str, PaddingStrategy] = True\n",
+    "    max_length: Optional[int] = None\n",
+    "    pad_to_multiple_of: Optional[int] = None\n",
+    "    return_tensors: str = \"pt\"\n",
+    "\n",
+    "    def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:\n",
+    "        merged_features = []\n",
+    "        for feature in features:\n",
+    "            merged_features.append(\n",
+    "                {\n",
+    "                    \"input_ids\": feature[\"input_ids_chosen\"],\n",
+    "                    \"attention_mask\": feature[\"attention_mask_chosen\"],\n",
+    "                }\n",
+    "            )\n",
+    "            merged_features.append(\n",
+    "                {\n",
+    "                    \"input_ids\": feature[\"input_ids_rejected\"],\n",
+    "                    \"attention_mask\": feature[\"attention_mask_rejected\"],\n",
+    "                }\n",
+    "            )\n",
+    "        batch = self.tokenizer.pad(\n",
+    "            merged_features,\n",
+    "            padding=self.padding,\n",
+    "            max_length=self.max_length,\n",
+    "            pad_to_multiple_of=self.pad_to_multiple_of,\n",
+    "            return_tensors=self.return_tensors,\n",
+    "        )\n",
+    "        batch = {\n",
+    "            \"input_ids\": batch[\"input_ids\"],\n",
+    "            \"attention_mask\": batch[\"attention_mask\"],\n",
+    "            \"return_loss\": True,\n",
+    "        }\n",
+    "        return batch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "bd267f29-20bd-496c-8f07-08bc76eeb583",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/opt/conda/lib/python3.10/site-packages/trl/trainer/reward_trainer.py:189: UserWarning: When using RewardDataCollatorWithPadding, you should set `remove_unused_columns=False` in your RewardConfig we have set it for you, but you should do it yourself in the future.\n",
+      "  warnings.warn(\n",
+      "You're using a GemmaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n",
+      "/opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2717: UserWarning: `max_length` is ignored when `padding`=`True` and there is no truncation strategy. To pad to max length, use `padding='max_length'`.\n",
+      "  warnings.warn(\n",
+      "/opt/conda/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:426: UserWarning: Input type into Linear4bit is torch.float16, but bnb_4bit_compute_dtype=torch.float32 (default). This will lead to slow inference or training speed.\n",
+      "  warnings.warn(\n",
+      "Could not estimate the number of tokens of the input, floating-point operations will not be computed\n",
+      "/opt/conda/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:2717: UserWarning: `max_length` is ignored when `padding`=`True` and there is no truncation strategy. To pad to max length, use `padding='max_length'`.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "ename": "OutOfMemoryError",
+     "evalue": "CUDA out of memory. Tried to allocate 132.00 MiB. GPU 0 has a total capacity of 23.65 GiB of which 38.69 MiB is free. Process 2450213 has 23.61 GiB memory in use. Of the allocated memory 22.91 GiB is allocated by PyTorch, and 243.12 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mOutOfMemoryError\u001b[0m                          Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[12], line 10\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtrl\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m RewardTrainer\n\u001b[1;32m      2\u001b[0m trainer \u001b[38;5;241m=\u001b[39m RewardTrainer(\n\u001b[1;32m      3\u001b[0m     model \u001b[38;5;241m=\u001b[39m model,\n\u001b[1;32m      4\u001b[0m     args \u001b[38;5;241m=\u001b[39m training_args,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m      8\u001b[0m     peft_config\u001b[38;5;241m=\u001b[39m peft_config\n\u001b[1;32m      9\u001b[0m )\n\u001b[0;32m---> 10\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     12\u001b[0m trainer\u001b[38;5;241m.\u001b[39msave_model(save_path)\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmodel saved at\u001b[39m\u001b[38;5;124m'\u001b[39m, save_path)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1885\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m   1883\u001b[0m         hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m   1884\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1885\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1886\u001b[0m \u001b[43m        \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1887\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1888\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1889\u001b[0m \u001b[43m        \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1890\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2216\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m   2213\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcallback_handler\u001b[38;5;241m.\u001b[39mon_step_begin(args, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcontrol)\n\u001b[1;32m   2215\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maccelerator\u001b[38;5;241m.\u001b[39maccumulate(model):\n\u001b[0;32m-> 2216\u001b[0m     tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtraining_step\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   2218\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m   2219\u001b[0m     args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[1;32m   2220\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_xla_available()\n\u001b[1;32m   2221\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m torch\u001b[38;5;241m.\u001b[39misinf(tr_loss_step))\n\u001b[1;32m   2222\u001b[0m ):\n\u001b[1;32m   2223\u001b[0m     \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[1;32m   2224\u001b[0m     tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/trainer.py:3238\u001b[0m, in \u001b[0;36mTrainer.training_step\u001b[0;34m(self, model, inputs)\u001b[0m\n\u001b[1;32m   3235\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m loss_mb\u001b[38;5;241m.\u001b[39mreduce_mean()\u001b[38;5;241m.\u001b[39mdetach()\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39margs\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m   3237\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_loss_context_manager():\n\u001b[0;32m-> 3238\u001b[0m     loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompute_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3240\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m inputs\n\u001b[1;32m   3241\u001b[0m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mempty_cache()\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/trl/trainer/reward_trainer.py:228\u001b[0m, in \u001b[0;36mRewardTrainer.compute_loss\u001b[0;34m(self, model, inputs, return_outputs)\u001b[0m\n\u001b[1;32m    222\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39muse_reward_data_collator:\n\u001b[1;32m    223\u001b[0m     warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m    224\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe current compute_loss is implemented for RewardDataCollatorWithPadding,\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    225\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m if you are using a custom data collator make sure you know what you are doing or\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    226\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m implement your own compute_loss method.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    227\u001b[0m     )\n\u001b[0;32m--> 228\u001b[0m rewards_chosen \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    229\u001b[0m \u001b[43m    \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minput_ids_chosen\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    230\u001b[0m \u001b[43m    \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mattention_mask_chosen\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    231\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    232\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlogits\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m    233\u001b[0m rewards_rejected \u001b[38;5;241m=\u001b[39m model(\n\u001b[1;32m    234\u001b[0m     input_ids\u001b[38;5;241m=\u001b[39minputs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minput_ids_rejected\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m    235\u001b[0m     attention_mask\u001b[38;5;241m=\u001b[39minputs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mattention_mask_rejected\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m    236\u001b[0m     return_dict\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m    237\u001b[0m )[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlogits\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m    238\u001b[0m \u001b[38;5;66;03m# calculate loss, optionally modulate with margin\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:822\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    821\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 822\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:810\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    809\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 810\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/amp/autocast_mode.py:16\u001b[0m, in \u001b[0;36mautocast_decorator.<locals>.decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m     14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     15\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 16\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:822\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    821\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 822\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:810\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    809\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 810\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/amp/autocast_mode.py:16\u001b[0m, in \u001b[0;36mautocast_decorator.<locals>.decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m     14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     15\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 16\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:822\u001b[0m, in \u001b[0;36mconvert_outputs_to_fp32.<locals>.forward\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    821\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 822\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/utils/operations.py:810\u001b[0m, in \u001b[0;36mConvertOutputsToFp32.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    809\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 810\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m convert_to_fp32(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/amp/autocast_mode.py:16\u001b[0m, in \u001b[0;36mautocast_decorator.<locals>.decorate_autocast\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m     14\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_autocast\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m     15\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m autocast_instance:\n\u001b[0;32m---> 16\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/peft/peft_model.py:1238\u001b[0m, in \u001b[0;36mPeftModelForSequenceClassification.forward\u001b[0;34m(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)\u001b[0m\n\u001b[1;32m   1236\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m peft_config\u001b[38;5;241m.\u001b[39mpeft_type \u001b[38;5;241m==\u001b[39m PeftType\u001b[38;5;241m.\u001b[39mPOLY:\n\u001b[1;32m   1237\u001b[0m             kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtask_ids\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m task_ids\n\u001b[0;32m-> 1238\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1239\u001b[0m \u001b[43m            \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1240\u001b[0m \u001b[43m            \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1241\u001b[0m \u001b[43m            \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1242\u001b[0m \u001b[43m            \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1243\u001b[0m \u001b[43m            \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1244\u001b[0m \u001b[43m            \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1245\u001b[0m \u001b[43m            \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1246\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1247\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1249\u001b[0m batch_size \u001b[38;5;241m=\u001b[39m _get_batch_size(input_ids, inputs_embeds)\n\u001b[1;32m   1250\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m attention_mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   1251\u001b[0m     \u001b[38;5;66;03m# concat prompt attention mask\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:179\u001b[0m, in \u001b[0;36mBaseTuner.forward\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    178\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any):\n\u001b[0;32m--> 179\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/peft/peft_model.py:1430\u001b[0m, in \u001b[0;36mPeftModelForCausalLM.forward\u001b[0;34m(self, input_ids, attention_mask, inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, task_ids, **kwargs)\u001b[0m\n\u001b[1;32m   1428\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_enable_peft_forward_hooks(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m   1429\u001b[0m         kwargs \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mspecial_peft_forward_args}\n\u001b[0;32m-> 1430\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1431\u001b[0m \u001b[43m            \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1432\u001b[0m \u001b[43m            \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1433\u001b[0m \u001b[43m            \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1434\u001b[0m \u001b[43m            \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1435\u001b[0m \u001b[43m            \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1436\u001b[0m \u001b[43m            \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1437\u001b[0m \u001b[43m            \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1438\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1439\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1441\u001b[0m batch_size \u001b[38;5;241m=\u001b[39m _get_batch_size(input_ids, inputs_embeds)\n\u001b[1;32m   1442\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m attention_mask \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   1443\u001b[0m     \u001b[38;5;66;03m# concat prompt attention mask\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/peft/tuners/tuners_utils.py:179\u001b[0m, in \u001b[0;36mBaseTuner.forward\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    178\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any):\n\u001b[0;32m--> 179\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/hooks.py:166\u001b[0m, in \u001b[0;36madd_hook_to_module.<locals>.new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m    164\u001b[0m         output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 166\u001b[0m     output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/models/gemma/modeling_gemma.py:1281\u001b[0m, in \u001b[0;36mGemmaForSequenceClassification.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m   1273\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   1274\u001b[0m \u001b[38;5;124;03mlabels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):\u001b[39;00m\n\u001b[1;32m   1275\u001b[0m \u001b[38;5;124;03m    Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,\u001b[39;00m\n\u001b[1;32m   1276\u001b[0m \u001b[38;5;124;03m    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If\u001b[39;00m\n\u001b[1;32m   1277\u001b[0m \u001b[38;5;124;03m    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).\u001b[39;00m\n\u001b[1;32m   1278\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   1279\u001b[0m return_dict \u001b[38;5;241m=\u001b[39m return_dict \u001b[38;5;28;01mif\u001b[39;00m return_dict \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39muse_return_dict\n\u001b[0;32m-> 1281\u001b[0m transformer_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1282\u001b[0m \u001b[43m    \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1283\u001b[0m \u001b[43m    \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattention_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1284\u001b[0m \u001b[43m    \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1285\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpast_key_values\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1286\u001b[0m \u001b[43m    \u001b[49m\u001b[43minputs_embeds\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs_embeds\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1287\u001b[0m \u001b[43m    \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1288\u001b[0m \u001b[43m    \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1289\u001b[0m \u001b[43m    \u001b[49m\u001b[43moutput_hidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_hidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1290\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_dict\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1291\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1292\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m transformer_outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m   1293\u001b[0m logits \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscore(hidden_states)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/hooks.py:166\u001b[0m, in \u001b[0;36madd_hook_to_module.<locals>.new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m    164\u001b[0m         output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 166\u001b[0m     output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/models/gemma/modeling_gemma.py:902\u001b[0m, in \u001b[0;36mGemmaModel.forward\u001b[0;34m(self, input_ids, attention_mask, position_ids, past_key_values, inputs_embeds, use_cache, output_attentions, output_hidden_states, return_dict, cache_position)\u001b[0m\n\u001b[1;32m    891\u001b[0m     layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_gradient_checkpointing_func(\n\u001b[1;32m    892\u001b[0m         decoder_layer\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__call__\u001b[39m,\n\u001b[1;32m    893\u001b[0m         hidden_states,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    899\u001b[0m         cache_position,\n\u001b[1;32m    900\u001b[0m     )\n\u001b[1;32m    901\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 902\u001b[0m     layer_outputs \u001b[38;5;241m=\u001b[39m \u001b[43mdecoder_layer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    903\u001b[0m \u001b[43m        \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    904\u001b[0m \u001b[43m        \u001b[49m\u001b[43mattention_mask\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcausal_mask\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    905\u001b[0m \u001b[43m        \u001b[49m\u001b[43mposition_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mposition_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    906\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpast_key_value\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpast_key_values\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    907\u001b[0m \u001b[43m        \u001b[49m\u001b[43moutput_attentions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moutput_attentions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    908\u001b[0m \u001b[43m        \u001b[49m\u001b[43muse_cache\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_cache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    909\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcache_position\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_position\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    910\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    912\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m layer_outputs[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m    914\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m use_cache:\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/hooks.py:166\u001b[0m, in \u001b[0;36madd_hook_to_module.<locals>.new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m    164\u001b[0m         output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 166\u001b[0m     output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/models/gemma/modeling_gemma.py:652\u001b[0m, in \u001b[0;36mGemmaDecoderLayer.forward\u001b[0;34m(self, hidden_states, attention_mask, position_ids, past_key_value, output_attentions, use_cache, cache_position)\u001b[0m\n\u001b[1;32m    650\u001b[0m residual \u001b[38;5;241m=\u001b[39m hidden_states\n\u001b[1;32m    651\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpost_attention_layernorm(hidden_states)\n\u001b[0;32m--> 652\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmlp\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    653\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m residual \u001b[38;5;241m+\u001b[39m hidden_states\n\u001b[1;32m    655\u001b[0m outputs \u001b[38;5;241m=\u001b[39m (hidden_states,)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/hooks.py:166\u001b[0m, in \u001b[0;36madd_hook_to_module.<locals>.new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m    164\u001b[0m         output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 166\u001b[0m     output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/models/gemma/modeling_gemma.py:185\u001b[0m, in \u001b[0;36mGemmaMLP.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m    184\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[0;32m--> 185\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdown_proj(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mact_fn(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgate_proj\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m) \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mup_proj(x))\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/peft/tuners/lora/bnb.py:452\u001b[0m, in \u001b[0;36mLinear4bit.forward\u001b[0;34m(self, x, *args, **kwargs)\u001b[0m\n\u001b[1;32m    450\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbase_layer(x, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    451\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 452\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_layer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    453\u001b[0m     \u001b[38;5;66;03m# As per Tim Dettmers, for 4bit, we need to defensively clone here.\u001b[39;00m\n\u001b[1;32m    454\u001b[0m     \u001b[38;5;66;03m# The reason is that in some cases, an error can occur that backprop\u001b[39;00m\n\u001b[1;32m    455\u001b[0m     \u001b[38;5;66;03m# does not work on a manipulated view. This issue may be solved with\u001b[39;00m\n\u001b[1;32m    456\u001b[0m     \u001b[38;5;66;03m# newer PyTorch versions but this would need extensive testing to be\u001b[39;00m\n\u001b[1;32m    457\u001b[0m     \u001b[38;5;66;03m# sure.\u001b[39;00m\n\u001b[1;32m    458\u001b[0m     result \u001b[38;5;241m=\u001b[39m result\u001b[38;5;241m.\u001b[39mclone()\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1511\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1509\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1510\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1511\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py:1520\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1515\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1516\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1517\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1518\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1519\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1520\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1522\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1523\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/accelerate/hooks.py:166\u001b[0m, in \u001b[0;36madd_hook_to_module.<locals>.new_forward\u001b[0;34m(module, *args, **kwargs)\u001b[0m\n\u001b[1;32m    164\u001b[0m         output \u001b[38;5;241m=\u001b[39m module\u001b[38;5;241m.\u001b[39m_old_forward(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    165\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 166\u001b[0m     output \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_old_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m module\u001b[38;5;241m.\u001b[39m_hf_hook\u001b[38;5;241m.\u001b[39mpost_forward(module, output)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/bitsandbytes/nn/modules.py:468\u001b[0m, in \u001b[0;36mLinear4bit.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m    465\u001b[0m     x \u001b[38;5;241m=\u001b[39m x\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_dtype)\n\u001b[1;32m    467\u001b[0m bias \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbias\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcompute_dtype)\n\u001b[0;32m--> 468\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mbnb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmatmul_4bit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mt\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquant_state\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    470\u001b[0m out \u001b[38;5;241m=\u001b[39m out\u001b[38;5;241m.\u001b[39mto(inp_dtype)\n\u001b[1;32m    472\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m out\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py:579\u001b[0m, in \u001b[0;36mmatmul_4bit\u001b[0;34m(A, B, quant_state, out, bias)\u001b[0m\n\u001b[1;32m    577\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m out\n\u001b[1;32m    578\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 579\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mMatMul4Bit\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mA\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mB\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_state\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/autograd/function.py:553\u001b[0m, in \u001b[0;36mFunction.apply\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m    550\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_C\u001b[38;5;241m.\u001b[39m_are_functorch_transforms_active():\n\u001b[1;32m    551\u001b[0m     \u001b[38;5;66;03m# See NOTE: [functorch vjp and autograd interaction]\u001b[39;00m\n\u001b[1;32m    552\u001b[0m     args \u001b[38;5;241m=\u001b[39m _functorch\u001b[38;5;241m.\u001b[39mutils\u001b[38;5;241m.\u001b[39munwrap_dead_wrappers(args)\n\u001b[0;32m--> 553\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m    555\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_setup_ctx_defined:\n\u001b[1;32m    556\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m    557\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIn order to use an autograd.Function with functorch transforms \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    558\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(vmap, grad, jvp, jacrev, ...), it must override the setup_context \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    559\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstaticmethod. For more details, please see \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    560\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://pytorch.org/docs/master/notes/extending.func.html\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    561\u001b[0m     )\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py:509\u001b[0m, in \u001b[0;36mMatMul4Bit.forward\u001b[0;34m(ctx, A, B, out, bias, quant_state)\u001b[0m\n\u001b[1;32m    505\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mempty(A\u001b[38;5;241m.\u001b[39mshape[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m+\u001b[39m B_shape[:\u001b[38;5;241m1\u001b[39m], dtype\u001b[38;5;241m=\u001b[39mA\u001b[38;5;241m.\u001b[39mdtype, device\u001b[38;5;241m=\u001b[39mA\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m    507\u001b[0m \u001b[38;5;66;03m# 1. Dequantize\u001b[39;00m\n\u001b[1;32m    508\u001b[0m \u001b[38;5;66;03m# 2. MatmulnN\u001b[39;00m\n\u001b[0;32m--> 509\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunctional\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinear\u001b[49m\u001b[43m(\u001b[49m\u001b[43mA\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdequantize_4bit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mB\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquant_state\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mA\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mt\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    511\u001b[0m \u001b[38;5;66;03m# 3. Save state\u001b[39;00m\n\u001b[1;32m    512\u001b[0m ctx\u001b[38;5;241m.\u001b[39mstate \u001b[38;5;241m=\u001b[39m quant_state\n",
+      "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 132.00 MiB. GPU 0 has a total capacity of 23.65 GiB of which 38.69 MiB is free. Process 2450213 has 23.61 GiB memory in use. Of the allocated memory 22.91 GiB is allocated by PyTorch, and 243.12 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
+     ]
+    }
+   ],
+   "source": [
+    "from trl import RewardTrainer\n",
+    "trainer = RewardTrainer(\n",
+    "    model = model,\n",
+    "    args = training_args,\n",
+    "    tokenizer= tokenizer,\n",
+    "    train_dataset= traindata,\n",
+    "    eval_dataset= val_data,\n",
+    "    peft_config= peft_config\n",
+    ")\n",
+    "trainer.train()\n",
+    "\n",
+    "trainer.save_model(save_path)\n",
+    "print('model saved at', save_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c61a7bbc-98cb-4419-9769-0726c94bc831",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "traindata[0]['input_ids_chosen']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "44e7ff33-416a-4d03-a64f-e72b0dc95d7c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "basemodel = 'mistralai/Mistral-7B-Instruct-v0.3'\n",
+    "model = AutoModelForSequenceClassification.from_pretrained(base_model, num_labels = 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62b9f6d7-9c26-4f07-9a83-58ab03f403db",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model(input_ids = torch.tensor(traindata[0]['input_ids_chosen']),\n",
+    "      attention_mask = torch.tensor(traindata[0]['attention_mask_chosen']),\n",
+    "      return_dict=True)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "64b0abd3-28b0-462d-98bd-d61446b75935",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "traindata[0]['input_ids_chosen']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cb854c8b-826a-43c1-87b3-a0b4cd3103e6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenizer(traindata[0]['chosen_text'], truncation=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "71d84ed8-deca-4a2b-8915-cd504e4f7f88",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import warnings\n",
+    "from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig\n",
+    "from transformers import AutoModelForSequenceClassification, AutoTokenizer\n",
+    "from dataloader import StoryPairDataset\n",
+    "from trl import RewardTrainer, RewardConfig\n",
+    "import os\n",
+    "#os.environ[\"WANDB_PROJECT\"] = \"<my-amazing-project>\"  # name your W&B project\n",
+    "os.environ[\"WANDB_LOG_MODEL\"] = \"checkpoint\"  # log all model checkpoints\n",
+    "\n",
+    "\n",
+    "# datapath = 'readsy/stories/'\n",
+    "# pairpath = '../../../work/lawecon/Work/penghao/readsy_story_pairs0407.csv'\n",
+    "# model_name = \"../../../work/lawecon/Work/penghao/SFTmodels/gemma-2b_sftm3genre10\"\n",
+    "# base_model = '../../../work/lawecon/Work/penghao/gemma/gemma-2b'\n",
+    "mode='m3' if 'm3' in model_name else 'm2'\n",
+    "if 'random' in model_name:\n",
+    "    split_by = 'random'\n",
+    "elif 'time' in model_name:\n",
+    "    split_by = 'time'\n",
+    "else:\n",
+    "    split_by = 'random'\n",
+    "lease_likes = 10\n",
+    "max_seq_length = 2048*2 # Choose any! We auto support RoPE Scaling internally!\n",
+    "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+    "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
+    "margin = False\n",
+    "save_path = '../../../work/lawecon/Work/penghao/reward_models/' +model_name + '_rm' + 'margin' if margin else '_no_margin'\n",
+    "if margin:\n",
+    "    save_path += 'margin'\n",
+    "\n",
+    "model = AutoModelForSequenceClassification.from_pretrained(base_model, load_in_4bit=True)\n",
+    "model = PeftModel.from_pretrained(model, model_name)\n",
+    "tokenizer = AutoTokenizer.from_pretrained(base_model)\n",
+    "#model = nn.Sequential(model, nn.Linear(model.config.hidden_size, 1), nn.Sigmoid())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bcc13555-b49f-4bbc-b4cc-b44a74b2a987",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

README.md ADDED Viewed

	@@ -0,0 +1,59 @@

+---
+base_model: google/gemma-2-9b
+library_name: peft
+license: gemma
+tags:
+- trl
+- reward-trainer
+- generated_from_trainer
+model-index:
+- name: workspace
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/penghaowang14/huggingface/runs/2dg983o2)
+# workspace
+This model is a fine-tuned version of [google/gemma-2-9b](https://huggingface.co/google/gemma-2-9b) on the None dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- gradient_accumulation_steps: 16
+- total_train_batch_size: 64
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 1.0
+### Training results
+### Framework versions
+- PEFT 0.12.0
+- Transformers 4.43.2
+- Pytorch 2.2.0
+- Datasets 2.20.0
+- Tokenizers 0.19.1

RMmodels/gemma-2-9b_sftm3genre36007200/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: google/gemma-2-9b
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.12.0

RMmodels/gemma-2-9b_sftm3genre36007200/adapter_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-2-9b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "o_proj",
+    "q_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

RMmodels/gemma-2-9b_sftm3genre36007200/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7b47235a73dadbfe04c47a58fcf387d1e01f23b7db05520e13eebbbd51b9f89
+size 286306976

RMmodels/gemma-2-9b_sftm3genre36007200/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "additional_special_tokens": [
+    "<start_of_turn>",
+    "<end_of_turn>"
+  ],
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

RMmodels/gemma-2-9b_sftm3genre36007200/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bdd6fa579b0cae69393298845f25133763e90c5814db935ee4496d161aca4da
+size 17518624

RMmodels/gemma-2-9b_sftm3genre36007200/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,1760 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<eos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<bos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "5": {
+      "content": "<2mass>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "6": {
+      "content": "[@BOS@]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "7": {
+      "content": "<unused0>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "8": {
+      "content": "<unused1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "9": {
+      "content": "<unused2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "10": {
+      "content": "<unused3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "11": {
+      "content": "<unused4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "12": {
+      "content": "<unused5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "13": {
+      "content": "<unused6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "14": {
+      "content": "<unused7>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "15": {
+      "content": "<unused8>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "16": {
+      "content": "<unused9>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "17": {
+      "content": "<unused10>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "18": {
+      "content": "<unused11>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "19": {
+      "content": "<unused12>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "20": {
+      "content": "<unused13>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "21": {
+      "content": "<unused14>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "22": {
+      "content": "<unused15>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "23": {
+      "content": "<unused16>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "24": {
+      "content": "<unused17>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "25": {
+      "content": "<unused18>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "26": {
+      "content": "<unused19>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "27": {
+      "content": "<unused20>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "28": {
+      "content": "<unused21>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "29": {
+      "content": "<unused22>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "30": {
+      "content": "<unused23>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "31": {
+      "content": "<unused24>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32": {
+      "content": "<unused25>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "33": {
+      "content": "<unused26>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "34": {
+      "content": "<unused27>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "35": {
+      "content": "<unused28>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "36": {
+      "content": "<unused29>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "37": {
+      "content": "<unused30>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "38": {
+      "content": "<unused31>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "39": {
+      "content": "<unused32>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "40": {
+      "content": "<unused33>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "41": {
+      "content": "<unused34>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "42": {
+      "content": "<unused35>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "43": {
+      "content": "<unused36>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "44": {
+      "content": "<unused37>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "45": {
+      "content": "<unused38>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "46": {
+      "content": "<unused39>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "47": {
+      "content": "<unused40>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "48": {
+      "content": "<unused41>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "49": {
+      "content": "<unused42>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50": {
+      "content": "<unused43>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "51": {
+      "content": "<unused44>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "52": {
+      "content": "<unused45>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "53": {
+      "content": "<unused46>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "54": {
+      "content": "<unused47>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "55": {
+      "content": "<unused48>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "56": {
+      "content": "<unused49>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "57": {
+      "content": "<unused50>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "58": {
+      "content": "<unused51>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "59": {
+      "content": "<unused52>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "60": {
+      "content": "<unused53>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "61": {
+      "content": "<unused54>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "62": {
+      "content": "<unused55>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "63": {
+      "content": "<unused56>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "64": {
+      "content": "<unused57>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "65": {
+      "content": "<unused58>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "66": {
+      "content": "<unused59>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "67": {
+      "content": "<unused60>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "68": {
+      "content": "<unused61>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "69": {
+      "content": "<unused62>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "70": {
+      "content": "<unused63>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "71": {
+      "content": "<unused64>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "72": {
+      "content": "<unused65>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "73": {
+      "content": "<unused66>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "74": {
+      "content": "<unused67>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "75": {
+      "content": "<unused68>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "76": {
+      "content": "<unused69>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "77": {
+      "content": "<unused70>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "78": {
+      "content": "<unused71>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "79": {
+      "content": "<unused72>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "80": {
+      "content": "<unused73>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "81": {
+      "content": "<unused74>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "82": {
+      "content": "<unused75>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "83": {
+      "content": "<unused76>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "84": {
+      "content": "<unused77>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "85": {
+      "content": "<unused78>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "86": {
+      "content": "<unused79>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "87": {
+      "content": "<unused80>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "88": {
+      "content": "<unused81>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "89": {
+      "content": "<unused82>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "90": {
+      "content": "<unused83>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "91": {
+      "content": "<unused84>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "92": {
+      "content": "<unused85>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "93": {
+      "content": "<unused86>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "94": {
+      "content": "<unused87>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "95": {
+      "content": "<unused88>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "96": {
+      "content": "<unused89>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "97": {
+      "content": "<unused90>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "98": {
+      "content": "<unused91>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "99": {
+      "content": "<unused92>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100": {
+      "content": "<unused93>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "101": {
+      "content": "<unused94>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "102": {
+      "content": "<unused95>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "103": {
+      "content": "<unused96>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "104": {
+      "content": "<unused97>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "105": {
+      "content": "<unused98>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "106": {
+      "content": "<start_of_turn>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "107": {
+      "content": "<end_of_turn>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "108": {
+      "content": "\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "109": {
+      "content": "\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "110": {
+      "content": "\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "111": {
+      "content": "\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "112": {
+      "content": "\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "113": {
+      "content": "\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "114": {
+      "content": "\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "115": {
+      "content": "\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "116": {
+      "content": "\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "117": {
+      "content": "\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "118": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "119": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "120": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "121": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "122": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "123": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "124": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "125": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "126": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "127": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "128": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "129": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "130": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "131": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "132": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "133": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "134": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "135": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "136": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "137": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "138": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "139": {
+      "content": "▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "140": {
+      "content": "▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "141": {
+      "content": "▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "142": {
+      "content": "▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "143": {
+      "content": "▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "144": {
+      "content": "▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "145": {
+      "content": "▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "146": {
+      "content": "▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "147": {
+      "content": "▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "148": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "149": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "150": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "152": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "153": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "154": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "155": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "156": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "157": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "158": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "159": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "160": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "161": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "162": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "163": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "164": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "165": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "166": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "167": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "168": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "169": {
+      "content": "<table>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "170": {
+      "content": "<caption>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "171": {
+      "content": "<thead>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "172": {
+      "content": "<tbody>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "173": {
+      "content": "<tfoot>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "174": {
+      "content": "<tr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "175": {
+      "content": "<th>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "176": {
+      "content": "<td>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "177": {
+      "content": "</table>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "178": {
+      "content": "</caption>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "179": {
+      "content": "</thead>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "180": {
+      "content": "</tbody>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "181": {
+      "content": "</tfoot>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "182": {
+      "content": "</tr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "183": {
+      "content": "</th>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "184": {
+      "content": "</td>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "185": {
+      "content": "<h1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "186": {
+      "content": "<h2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "187": {
+      "content": "<h3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "188": {
+      "content": "<h4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "189": {
+      "content": "<h5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "190": {
+      "content": "<h6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "191": {
+      "content": "<blockquote>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "192": {
+      "content": "</h1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "193": {
+      "content": "</h2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "194": {
+      "content": "</h3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "195": {
+      "content": "</h4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "196": {
+      "content": "</h5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "197": {
+      "content": "</h6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "198": {
+      "content": "</blockquote>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "199": {
+      "content": "<strong>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "200": {
+      "content": "<em>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "201": {
+      "content": "<b>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "202": {
+      "content": "<i>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "203": {
+      "content": "<u>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "204": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "205": {
+      "content": "<sub>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "206": {
+      "content": "<sup>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "207": {
+      "content": "<code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "208": {
+      "content": "</strong>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "209": {
+      "content": "</em>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "210": {
+      "content": "</b>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "211": {
+      "content": "</i>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "212": {
+      "content": "</u>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "213": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "214": {
+      "content": "</sub>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "215": {
+      "content": "</sup>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "216": {
+      "content": "</code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<start_of_turn>",
+    "<end_of_turn>"
+  ],
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<eos>",
+  "max_length": 4096,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<eos>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "stride": 0,
+  "tokenizer_class": "GemmaTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

RMmodels/gemma-2-9b_sftm3genre36007200/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5c83e3b3b73d2647d74b5ae783cd66e92823acac7fcaf02b787aea1f2046579
+size 5176

SFTmodels/gemma-2-9b_sftm2genre100714/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: google/gemma-2-9b
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.12.0

SFTmodels/gemma-2-9b_sftm2genre100714/adapter_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-2-9b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "q_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

SFTmodels/gemma-2-9b_sftm2genre100714/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e090ba720934c8ad68e0f9e96ac348cdf2eadbde7919e22558b302b5747fedd5
+size 286306976

SFTmodels/gemma-2-9b_sftm2genre100714/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "additional_special_tokens": [
+    "<start_of_turn>",
+    "<end_of_turn>"
+  ],
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<eos>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

SFTmodels/gemma-2-9b_sftm2genre100714/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bdd6fa579b0cae69393298845f25133763e90c5814db935ee4496d161aca4da
+size 17518624

SFTmodels/gemma-2-9b_sftm2genre100714/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,1756 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<eos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<bos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "5": {
+      "content": "<2mass>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "6": {
+      "content": "[@BOS@]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "7": {
+      "content": "<unused0>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "8": {
+      "content": "<unused1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "9": {
+      "content": "<unused2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "10": {
+      "content": "<unused3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "11": {
+      "content": "<unused4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "12": {
+      "content": "<unused5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "13": {
+      "content": "<unused6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "14": {
+      "content": "<unused7>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "15": {
+      "content": "<unused8>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "16": {
+      "content": "<unused9>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "17": {
+      "content": "<unused10>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "18": {
+      "content": "<unused11>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "19": {
+      "content": "<unused12>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "20": {
+      "content": "<unused13>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "21": {
+      "content": "<unused14>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "22": {
+      "content": "<unused15>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "23": {
+      "content": "<unused16>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "24": {
+      "content": "<unused17>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "25": {
+      "content": "<unused18>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "26": {
+      "content": "<unused19>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "27": {
+      "content": "<unused20>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "28": {
+      "content": "<unused21>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "29": {
+      "content": "<unused22>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "30": {
+      "content": "<unused23>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "31": {
+      "content": "<unused24>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32": {
+      "content": "<unused25>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "33": {
+      "content": "<unused26>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "34": {
+      "content": "<unused27>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "35": {
+      "content": "<unused28>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "36": {
+      "content": "<unused29>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "37": {
+      "content": "<unused30>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "38": {
+      "content": "<unused31>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "39": {
+      "content": "<unused32>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "40": {
+      "content": "<unused33>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "41": {
+      "content": "<unused34>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "42": {
+      "content": "<unused35>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "43": {
+      "content": "<unused36>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "44": {
+      "content": "<unused37>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "45": {
+      "content": "<unused38>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "46": {
+      "content": "<unused39>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "47": {
+      "content": "<unused40>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "48": {
+      "content": "<unused41>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "49": {
+      "content": "<unused42>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50": {
+      "content": "<unused43>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "51": {
+      "content": "<unused44>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "52": {
+      "content": "<unused45>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "53": {
+      "content": "<unused46>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "54": {
+      "content": "<unused47>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "55": {
+      "content": "<unused48>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "56": {
+      "content": "<unused49>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "57": {
+      "content": "<unused50>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "58": {
+      "content": "<unused51>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "59": {
+      "content": "<unused52>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "60": {
+      "content": "<unused53>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "61": {
+      "content": "<unused54>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "62": {
+      "content": "<unused55>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "63": {
+      "content": "<unused56>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "64": {
+      "content": "<unused57>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "65": {
+      "content": "<unused58>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "66": {
+      "content": "<unused59>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "67": {
+      "content": "<unused60>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "68": {
+      "content": "<unused61>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "69": {
+      "content": "<unused62>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "70": {
+      "content": "<unused63>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "71": {
+      "content": "<unused64>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "72": {
+      "content": "<unused65>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "73": {
+      "content": "<unused66>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "74": {
+      "content": "<unused67>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "75": {
+      "content": "<unused68>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "76": {
+      "content": "<unused69>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "77": {
+      "content": "<unused70>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "78": {
+      "content": "<unused71>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "79": {
+      "content": "<unused72>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "80": {
+      "content": "<unused73>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "81": {
+      "content": "<unused74>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "82": {
+      "content": "<unused75>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "83": {
+      "content": "<unused76>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "84": {
+      "content": "<unused77>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "85": {
+      "content": "<unused78>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "86": {
+      "content": "<unused79>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "87": {
+      "content": "<unused80>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "88": {
+      "content": "<unused81>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "89": {
+      "content": "<unused82>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "90": {
+      "content": "<unused83>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "91": {
+      "content": "<unused84>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "92": {
+      "content": "<unused85>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "93": {
+      "content": "<unused86>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "94": {
+      "content": "<unused87>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "95": {
+      "content": "<unused88>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "96": {
+      "content": "<unused89>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "97": {
+      "content": "<unused90>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "98": {
+      "content": "<unused91>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "99": {
+      "content": "<unused92>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100": {
+      "content": "<unused93>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "101": {
+      "content": "<unused94>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "102": {
+      "content": "<unused95>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "103": {
+      "content": "<unused96>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "104": {
+      "content": "<unused97>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "105": {
+      "content": "<unused98>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "106": {
+      "content": "<start_of_turn>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "107": {
+      "content": "<end_of_turn>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "108": {
+      "content": "\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "109": {
+      "content": "\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "110": {
+      "content": "\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "111": {
+      "content": "\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "112": {
+      "content": "\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "113": {
+      "content": "\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "114": {
+      "content": "\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "115": {
+      "content": "\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "116": {
+      "content": "\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "117": {
+      "content": "\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "118": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "119": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "120": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "121": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "122": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "123": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "124": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "125": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "126": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "127": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "128": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "129": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "130": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "131": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "132": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "133": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "134": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "135": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "136": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "137": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "138": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "139": {
+      "content": "▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "140": {
+      "content": "▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "141": {
+      "content": "▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "142": {
+      "content": "▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "143": {
+      "content": "▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "144": {
+      "content": "▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "145": {
+      "content": "▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "146": {
+      "content": "▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "147": {
+      "content": "▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "148": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "149": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "150": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "152": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "153": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "154": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "155": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "156": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "157": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "158": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "159": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "160": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "161": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "162": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "163": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "164": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "165": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "166": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "167": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "168": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "169": {
+      "content": "<table>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "170": {
+      "content": "<caption>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "171": {
+      "content": "<thead>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "172": {
+      "content": "<tbody>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "173": {
+      "content": "<tfoot>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "174": {
+      "content": "<tr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "175": {
+      "content": "<th>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "176": {
+      "content": "<td>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "177": {
+      "content": "</table>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "178": {
+      "content": "</caption>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "179": {
+      "content": "</thead>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "180": {
+      "content": "</tbody>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "181": {
+      "content": "</tfoot>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "182": {
+      "content": "</tr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "183": {
+      "content": "</th>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "184": {
+      "content": "</td>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "185": {
+      "content": "<h1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "186": {
+      "content": "<h2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "187": {
+      "content": "<h3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "188": {
+      "content": "<h4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "189": {
+      "content": "<h5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "190": {
+      "content": "<h6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "191": {
+      "content": "<blockquote>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "192": {
+      "content": "</h1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "193": {
+      "content": "</h2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "194": {
+      "content": "</h3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "195": {
+      "content": "</h4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "196": {
+      "content": "</h5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "197": {
+      "content": "</h6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "198": {
+      "content": "</blockquote>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "199": {
+      "content": "<strong>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "200": {
+      "content": "<em>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "201": {
+      "content": "<b>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "202": {
+      "content": "<i>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "203": {
+      "content": "<u>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "204": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "205": {
+      "content": "<sub>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "206": {
+      "content": "<sup>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "207": {
+      "content": "<code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "208": {
+      "content": "</strong>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "209": {
+      "content": "</em>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "210": {
+      "content": "</b>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "211": {
+      "content": "</i>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "212": {
+      "content": "</u>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "213": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "214": {
+      "content": "</sub>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "215": {
+      "content": "</sup>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "216": {
+      "content": "</code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<start_of_turn>",
+    "<end_of_turn>"
+  ],
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<eos>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<eos>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

SFTmodels/gemma-2-9b_sftm2genre100714/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ea81c452a1b9505437c440e939f11dbfaa3ba677ae14359aba807da5b03fbd6
+size 6072

SFTmodels/gemma-2-9b_sftm3genre1800/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: google/gemma-2-9b
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.12.0

SFTmodels/gemma-2-9b_sftm3genre1800/adapter_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-2-9b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "o_proj",
+    "v_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

SFTmodels/gemma-2-9b_sftm3genre1800/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cabf6c3c3b9a919b111a5ab5b28a17c63e78c31037471a11a21a22426bccb98d
+size 286306976

SFTmodels/gemma-2-9b_sftm3genre1800/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "additional_special_tokens": [
+    "<start_of_turn>",
+    "<end_of_turn>"
+  ],
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<eos>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

SFTmodels/gemma-2-9b_sftm3genre1800/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bdd6fa579b0cae69393298845f25133763e90c5814db935ee4496d161aca4da
+size 17518624

SFTmodels/gemma-2-9b_sftm3genre1800/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,1756 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<eos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<bos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "5": {
+      "content": "<2mass>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "6": {
+      "content": "[@BOS@]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "7": {
+      "content": "<unused0>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "8": {
+      "content": "<unused1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "9": {
+      "content": "<unused2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "10": {
+      "content": "<unused3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "11": {
+      "content": "<unused4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "12": {
+      "content": "<unused5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "13": {
+      "content": "<unused6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "14": {
+      "content": "<unused7>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "15": {
+      "content": "<unused8>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "16": {
+      "content": "<unused9>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "17": {
+      "content": "<unused10>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "18": {
+      "content": "<unused11>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "19": {
+      "content": "<unused12>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "20": {
+      "content": "<unused13>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "21": {
+      "content": "<unused14>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "22": {
+      "content": "<unused15>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "23": {
+      "content": "<unused16>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "24": {
+      "content": "<unused17>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "25": {
+      "content": "<unused18>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "26": {
+      "content": "<unused19>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "27": {
+      "content": "<unused20>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "28": {
+      "content": "<unused21>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "29": {
+      "content": "<unused22>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "30": {
+      "content": "<unused23>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "31": {
+      "content": "<unused24>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32": {
+      "content": "<unused25>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "33": {
+      "content": "<unused26>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "34": {
+      "content": "<unused27>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "35": {
+      "content": "<unused28>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "36": {
+      "content": "<unused29>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "37": {
+      "content": "<unused30>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "38": {
+      "content": "<unused31>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "39": {
+      "content": "<unused32>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "40": {
+      "content": "<unused33>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "41": {
+      "content": "<unused34>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "42": {
+      "content": "<unused35>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "43": {
+      "content": "<unused36>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "44": {
+      "content": "<unused37>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "45": {
+      "content": "<unused38>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "46": {
+      "content": "<unused39>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "47": {
+      "content": "<unused40>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "48": {
+      "content": "<unused41>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "49": {
+      "content": "<unused42>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50": {
+      "content": "<unused43>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "51": {
+      "content": "<unused44>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "52": {
+      "content": "<unused45>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "53": {
+      "content": "<unused46>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "54": {
+      "content": "<unused47>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "55": {
+      "content": "<unused48>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "56": {
+      "content": "<unused49>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "57": {
+      "content": "<unused50>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "58": {
+      "content": "<unused51>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "59": {
+      "content": "<unused52>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "60": {
+      "content": "<unused53>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "61": {
+      "content": "<unused54>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "62": {
+      "content": "<unused55>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "63": {
+      "content": "<unused56>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "64": {
+      "content": "<unused57>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "65": {
+      "content": "<unused58>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "66": {
+      "content": "<unused59>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "67": {
+      "content": "<unused60>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "68": {
+      "content": "<unused61>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "69": {
+      "content": "<unused62>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "70": {
+      "content": "<unused63>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "71": {
+      "content": "<unused64>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "72": {
+      "content": "<unused65>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "73": {
+      "content": "<unused66>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "74": {
+      "content": "<unused67>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "75": {
+      "content": "<unused68>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "76": {
+      "content": "<unused69>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "77": {
+      "content": "<unused70>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "78": {
+      "content": "<unused71>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "79": {
+      "content": "<unused72>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "80": {
+      "content": "<unused73>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "81": {
+      "content": "<unused74>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "82": {
+      "content": "<unused75>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "83": {
+      "content": "<unused76>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "84": {
+      "content": "<unused77>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "85": {
+      "content": "<unused78>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "86": {
+      "content": "<unused79>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "87": {
+      "content": "<unused80>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "88": {
+      "content": "<unused81>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "89": {
+      "content": "<unused82>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "90": {
+      "content": "<unused83>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "91": {
+      "content": "<unused84>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "92": {
+      "content": "<unused85>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "93": {
+      "content": "<unused86>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "94": {
+      "content": "<unused87>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "95": {
+      "content": "<unused88>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "96": {
+      "content": "<unused89>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "97": {
+      "content": "<unused90>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "98": {
+      "content": "<unused91>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "99": {
+      "content": "<unused92>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100": {
+      "content": "<unused93>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "101": {
+      "content": "<unused94>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "102": {
+      "content": "<unused95>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "103": {
+      "content": "<unused96>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "104": {
+      "content": "<unused97>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "105": {
+      "content": "<unused98>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "106": {
+      "content": "<start_of_turn>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "107": {
+      "content": "<end_of_turn>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "108": {
+      "content": "\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "109": {
+      "content": "\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "110": {
+      "content": "\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "111": {
+      "content": "\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "112": {
+      "content": "\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "113": {
+      "content": "\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "114": {
+      "content": "\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "115": {
+      "content": "\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "116": {
+      "content": "\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "117": {
+      "content": "\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "118": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "119": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "120": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "121": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "122": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "123": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "124": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "125": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "126": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "127": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "128": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "129": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "130": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "131": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "132": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "133": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "134": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "135": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "136": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "137": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "138": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "139": {
+      "content": "▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "140": {
+      "content": "▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "141": {
+      "content": "▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "142": {
+      "content": "▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "143": {
+      "content": "▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "144": {
+      "content": "▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "145": {
+      "content": "▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "146": {
+      "content": "▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "147": {
+      "content": "▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "148": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "149": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "150": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "152": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "153": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "154": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "155": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "156": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "157": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "158": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "159": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "160": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "161": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "162": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "163": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "164": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "165": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "166": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "167": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "168": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "169": {
+      "content": "<table>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "170": {
+      "content": "<caption>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "171": {
+      "content": "<thead>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "172": {
+      "content": "<tbody>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "173": {
+      "content": "<tfoot>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "174": {
+      "content": "<tr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "175": {
+      "content": "<th>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "176": {
+      "content": "<td>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "177": {
+      "content": "</table>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "178": {
+      "content": "</caption>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "179": {
+      "content": "</thead>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "180": {
+      "content": "</tbody>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "181": {
+      "content": "</tfoot>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "182": {
+      "content": "</tr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "183": {
+      "content": "</th>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "184": {
+      "content": "</td>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "185": {
+      "content": "<h1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "186": {
+      "content": "<h2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "187": {
+      "content": "<h3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "188": {
+      "content": "<h4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "189": {
+      "content": "<h5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "190": {
+      "content": "<h6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "191": {
+      "content": "<blockquote>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "192": {
+      "content": "</h1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "193": {
+      "content": "</h2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "194": {
+      "content": "</h3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "195": {
+      "content": "</h4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "196": {
+      "content": "</h5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "197": {
+      "content": "</h6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "198": {
+      "content": "</blockquote>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "199": {
+      "content": "<strong>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "200": {
+      "content": "<em>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "201": {
+      "content": "<b>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "202": {
+      "content": "<i>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "203": {
+      "content": "<u>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "204": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "205": {
+      "content": "<sub>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "206": {
+      "content": "<sup>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "207": {
+      "content": "<code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "208": {
+      "content": "</strong>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "209": {
+      "content": "</em>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "210": {
+      "content": "</b>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "211": {
+      "content": "</i>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "212": {
+      "content": "</u>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "213": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "214": {
+      "content": "</sub>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "215": {
+      "content": "</sup>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "216": {
+      "content": "</code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<start_of_turn>",
+    "<end_of_turn>"
+  ],
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<eos>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<eos>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

SFTmodels/gemma-2-9b_sftm3genre1800/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e94b3ae9c3c61dd262197617aca5e013c566231a36267808564a9f5d05a18db
+size 6072

SFTmodels/gemma-2-9b_sftm3genre3600/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: google/gemma-2-9b
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.12.0

SFTmodels/gemma-2-9b_sftm3genre3600/adapter_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-2-9b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "q_proj",
+    "v_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

SFTmodels/gemma-2-9b_sftm3genre3600/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3c8f57945841496c6431e3184432274606c6e53b6c42a814bc85a1bd09e336e
+size 286306976

SFTmodels/gemma-2-9b_sftm3genre3600/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "additional_special_tokens": [
+    "<start_of_turn>",
+    "<end_of_turn>"
+  ],
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<eos>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

SFTmodels/gemma-2-9b_sftm3genre3600/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bdd6fa579b0cae69393298845f25133763e90c5814db935ee4496d161aca4da
+size 17518624

SFTmodels/gemma-2-9b_sftm3genre3600/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,1756 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<eos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<bos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "5": {
+      "content": "<2mass>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "6": {
+      "content": "[@BOS@]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "7": {
+      "content": "<unused0>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "8": {
+      "content": "<unused1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "9": {
+      "content": "<unused2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "10": {
+      "content": "<unused3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "11": {
+      "content": "<unused4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "12": {
+      "content": "<unused5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "13": {
+      "content": "<unused6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "14": {
+      "content": "<unused7>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "15": {
+      "content": "<unused8>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "16": {
+      "content": "<unused9>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "17": {
+      "content": "<unused10>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "18": {
+      "content": "<unused11>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "19": {
+      "content": "<unused12>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "20": {
+      "content": "<unused13>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "21": {
+      "content": "<unused14>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "22": {
+      "content": "<unused15>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "23": {
+      "content": "<unused16>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "24": {
+      "content": "<unused17>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "25": {
+      "content": "<unused18>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "26": {
+      "content": "<unused19>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "27": {
+      "content": "<unused20>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "28": {
+      "content": "<unused21>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "29": {
+      "content": "<unused22>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "30": {
+      "content": "<unused23>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "31": {
+      "content": "<unused24>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32": {
+      "content": "<unused25>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "33": {
+      "content": "<unused26>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "34": {
+      "content": "<unused27>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "35": {
+      "content": "<unused28>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "36": {
+      "content": "<unused29>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "37": {
+      "content": "<unused30>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "38": {
+      "content": "<unused31>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "39": {
+      "content": "<unused32>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "40": {
+      "content": "<unused33>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "41": {
+      "content": "<unused34>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "42": {
+      "content": "<unused35>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "43": {
+      "content": "<unused36>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "44": {
+      "content": "<unused37>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "45": {
+      "content": "<unused38>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "46": {
+      "content": "<unused39>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "47": {
+      "content": "<unused40>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "48": {
+      "content": "<unused41>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "49": {
+      "content": "<unused42>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50": {
+      "content": "<unused43>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "51": {
+      "content": "<unused44>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "52": {
+      "content": "<unused45>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "53": {
+      "content": "<unused46>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "54": {
+      "content": "<unused47>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "55": {
+      "content": "<unused48>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "56": {
+      "content": "<unused49>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "57": {
+      "content": "<unused50>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "58": {
+      "content": "<unused51>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "59": {
+      "content": "<unused52>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "60": {
+      "content": "<unused53>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "61": {
+      "content": "<unused54>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "62": {
+      "content": "<unused55>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "63": {
+      "content": "<unused56>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "64": {
+      "content": "<unused57>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "65": {
+      "content": "<unused58>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "66": {
+      "content": "<unused59>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "67": {
+      "content": "<unused60>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "68": {
+      "content": "<unused61>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "69": {
+      "content": "<unused62>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "70": {
+      "content": "<unused63>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "71": {
+      "content": "<unused64>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "72": {
+      "content": "<unused65>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "73": {
+      "content": "<unused66>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "74": {
+      "content": "<unused67>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "75": {
+      "content": "<unused68>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "76": {
+      "content": "<unused69>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "77": {
+      "content": "<unused70>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "78": {
+      "content": "<unused71>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "79": {
+      "content": "<unused72>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "80": {
+      "content": "<unused73>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "81": {
+      "content": "<unused74>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "82": {
+      "content": "<unused75>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "83": {
+      "content": "<unused76>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "84": {
+      "content": "<unused77>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "85": {
+      "content": "<unused78>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "86": {
+      "content": "<unused79>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "87": {
+      "content": "<unused80>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "88": {
+      "content": "<unused81>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "89": {
+      "content": "<unused82>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "90": {
+      "content": "<unused83>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "91": {
+      "content": "<unused84>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "92": {
+      "content": "<unused85>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "93": {
+      "content": "<unused86>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "94": {
+      "content": "<unused87>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "95": {
+      "content": "<unused88>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "96": {
+      "content": "<unused89>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "97": {
+      "content": "<unused90>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "98": {
+      "content": "<unused91>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "99": {
+      "content": "<unused92>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100": {
+      "content": "<unused93>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "101": {
+      "content": "<unused94>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "102": {
+      "content": "<unused95>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "103": {
+      "content": "<unused96>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "104": {
+      "content": "<unused97>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "105": {
+      "content": "<unused98>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "106": {
+      "content": "<start_of_turn>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "107": {
+      "content": "<end_of_turn>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "108": {
+      "content": "\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "109": {
+      "content": "\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "110": {
+      "content": "\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "111": {
+      "content": "\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "112": {
+      "content": "\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "113": {
+      "content": "\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "114": {
+      "content": "\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "115": {
+      "content": "\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "116": {
+      "content": "\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "117": {
+      "content": "\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "118": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "119": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "120": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "121": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "122": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "123": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "124": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "125": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "126": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "127": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "128": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "129": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "130": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "131": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "132": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "133": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "134": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "135": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "136": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "137": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "138": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "139": {
+      "content": "▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "140": {
+      "content": "▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "141": {
+      "content": "▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "142": {
+      "content": "▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "143": {
+      "content": "▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "144": {
+      "content": "▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "145": {
+      "content": "▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "146": {
+      "content": "▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "147": {
+      "content": "▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "148": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "149": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "150": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "152": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "153": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "154": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "155": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "156": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "157": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "158": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "159": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "160": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "161": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "162": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "163": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "164": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "165": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "166": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "167": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "168": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "169": {
+      "content": "<table>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "170": {
+      "content": "<caption>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "171": {
+      "content": "<thead>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "172": {
+      "content": "<tbody>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "173": {
+      "content": "<tfoot>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "174": {
+      "content": "<tr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "175": {
+      "content": "<th>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "176": {
+      "content": "<td>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "177": {
+      "content": "</table>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "178": {
+      "content": "</caption>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "179": {
+      "content": "</thead>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "180": {
+      "content": "</tbody>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "181": {
+      "content": "</tfoot>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "182": {
+      "content": "</tr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "183": {
+      "content": "</th>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "184": {
+      "content": "</td>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "185": {
+      "content": "<h1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "186": {
+      "content": "<h2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "187": {
+      "content": "<h3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "188": {
+      "content": "<h4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "189": {
+      "content": "<h5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "190": {
+      "content": "<h6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "191": {
+      "content": "<blockquote>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "192": {
+      "content": "</h1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "193": {
+      "content": "</h2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "194": {
+      "content": "</h3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "195": {
+      "content": "</h4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "196": {
+      "content": "</h5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "197": {
+      "content": "</h6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "198": {
+      "content": "</blockquote>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "199": {
+      "content": "<strong>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "200": {
+      "content": "<em>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "201": {
+      "content": "<b>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "202": {
+      "content": "<i>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "203": {
+      "content": "<u>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "204": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "205": {
+      "content": "<sub>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "206": {
+      "content": "<sup>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "207": {
+      "content": "<code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "208": {
+      "content": "</strong>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "209": {
+      "content": "</em>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "210": {
+      "content": "</b>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "211": {
+      "content": "</i>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "212": {
+      "content": "</u>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "213": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "214": {
+      "content": "</sub>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "215": {
+      "content": "</sup>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "216": {
+      "content": "</code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<start_of_turn>",
+    "<end_of_turn>"
+  ],
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<eos>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<eos>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

SFTmodels/gemma-2-9b_sftm3genre3600/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5aa5d943ce89de0f4d67ebaf09bcea833c3abcfe1b9b2cb7e89f58e2b206e06
+size 6072

SFTmodels/gemma-2-9b_sftm3genre7200/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: google/gemma-2-9b
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.12.0

SFTmodels/gemma-2-9b_sftm3genre7200/adapter_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-2-9b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "k_proj",
+    "o_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

SFTmodels/gemma-2-9b_sftm3genre7200/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fca781d3d658164e44b22135f8d9156ef2b2c0dab9a9c15379b7b21c06d957c
+size 286306976

SFTmodels/gemma-2-9b_sftm3genre7200/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "additional_special_tokens": [
+    "<start_of_turn>",
+    "<end_of_turn>"
+  ],
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<eos>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

SFTmodels/gemma-2-9b_sftm3genre7200/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bdd6fa579b0cae69393298845f25133763e90c5814db935ee4496d161aca4da
+size 17518624

SFTmodels/gemma-2-9b_sftm3genre7200/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,1756 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<eos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<bos>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<mask>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "5": {
+      "content": "<2mass>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "6": {
+      "content": "[@BOS@]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "7": {
+      "content": "<unused0>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "8": {
+      "content": "<unused1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "9": {
+      "content": "<unused2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "10": {
+      "content": "<unused3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "11": {
+      "content": "<unused4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "12": {
+      "content": "<unused5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "13": {
+      "content": "<unused6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "14": {
+      "content": "<unused7>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "15": {
+      "content": "<unused8>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "16": {
+      "content": "<unused9>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "17": {
+      "content": "<unused10>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "18": {
+      "content": "<unused11>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "19": {
+      "content": "<unused12>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "20": {
+      "content": "<unused13>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "21": {
+      "content": "<unused14>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "22": {
+      "content": "<unused15>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "23": {
+      "content": "<unused16>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "24": {
+      "content": "<unused17>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "25": {
+      "content": "<unused18>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "26": {
+      "content": "<unused19>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "27": {
+      "content": "<unused20>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "28": {
+      "content": "<unused21>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "29": {
+      "content": "<unused22>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "30": {
+      "content": "<unused23>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "31": {
+      "content": "<unused24>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32": {
+      "content": "<unused25>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "33": {
+      "content": "<unused26>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "34": {
+      "content": "<unused27>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "35": {
+      "content": "<unused28>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "36": {
+      "content": "<unused29>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "37": {
+      "content": "<unused30>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "38": {
+      "content": "<unused31>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "39": {
+      "content": "<unused32>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "40": {
+      "content": "<unused33>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "41": {
+      "content": "<unused34>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "42": {
+      "content": "<unused35>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "43": {
+      "content": "<unused36>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "44": {
+      "content": "<unused37>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "45": {
+      "content": "<unused38>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "46": {
+      "content": "<unused39>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "47": {
+      "content": "<unused40>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "48": {
+      "content": "<unused41>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "49": {
+      "content": "<unused42>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50": {
+      "content": "<unused43>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "51": {
+      "content": "<unused44>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "52": {
+      "content": "<unused45>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "53": {
+      "content": "<unused46>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "54": {
+      "content": "<unused47>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "55": {
+      "content": "<unused48>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "56": {
+      "content": "<unused49>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "57": {
+      "content": "<unused50>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "58": {
+      "content": "<unused51>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "59": {
+      "content": "<unused52>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "60": {
+      "content": "<unused53>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "61": {
+      "content": "<unused54>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "62": {
+      "content": "<unused55>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "63": {
+      "content": "<unused56>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "64": {
+      "content": "<unused57>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "65": {
+      "content": "<unused58>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "66": {
+      "content": "<unused59>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "67": {
+      "content": "<unused60>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "68": {
+      "content": "<unused61>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "69": {
+      "content": "<unused62>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "70": {
+      "content": "<unused63>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "71": {
+      "content": "<unused64>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "72": {
+      "content": "<unused65>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "73": {
+      "content": "<unused66>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "74": {
+      "content": "<unused67>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "75": {
+      "content": "<unused68>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "76": {
+      "content": "<unused69>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "77": {
+      "content": "<unused70>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "78": {
+      "content": "<unused71>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "79": {
+      "content": "<unused72>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "80": {
+      "content": "<unused73>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "81": {
+      "content": "<unused74>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "82": {
+      "content": "<unused75>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "83": {
+      "content": "<unused76>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "84": {
+      "content": "<unused77>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "85": {
+      "content": "<unused78>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "86": {
+      "content": "<unused79>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "87": {
+      "content": "<unused80>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "88": {
+      "content": "<unused81>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "89": {
+      "content": "<unused82>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "90": {
+      "content": "<unused83>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "91": {
+      "content": "<unused84>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "92": {
+      "content": "<unused85>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "93": {
+      "content": "<unused86>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "94": {
+      "content": "<unused87>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "95": {
+      "content": "<unused88>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "96": {
+      "content": "<unused89>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "97": {
+      "content": "<unused90>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "98": {
+      "content": "<unused91>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "99": {
+      "content": "<unused92>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "100": {
+      "content": "<unused93>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "101": {
+      "content": "<unused94>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "102": {
+      "content": "<unused95>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "103": {
+      "content": "<unused96>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "104": {
+      "content": "<unused97>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "105": {
+      "content": "<unused98>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "106": {
+      "content": "<start_of_turn>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "107": {
+      "content": "<end_of_turn>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "108": {
+      "content": "\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "109": {
+      "content": "\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "110": {
+      "content": "\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "111": {
+      "content": "\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "112": {
+      "content": "\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "113": {
+      "content": "\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "114": {
+      "content": "\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "115": {
+      "content": "\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "116": {
+      "content": "\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "117": {
+      "content": "\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "118": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "119": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "120": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "121": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "122": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "123": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "124": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "125": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "126": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "127": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "128": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "129": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "130": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "131": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "132": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "133": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "134": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "135": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "136": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "137": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "138": {
+      "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "139": {
+      "content": "▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "140": {
+      "content": "▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "141": {
+      "content": "▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "142": {
+      "content": "▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "143": {
+      "content": "▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "144": {
+      "content": "▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "145": {
+      "content": "▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "146": {
+      "content": "▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "147": {
+      "content": "▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "148": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "149": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "150": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "152": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "153": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "154": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "155": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "156": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "157": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "158": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "159": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "160": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "161": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "162": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "163": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "164": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "165": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "166": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "167": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "168": {
+      "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "169": {
+      "content": "<table>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "170": {
+      "content": "<caption>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "171": {
+      "content": "<thead>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "172": {
+      "content": "<tbody>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "173": {
+      "content": "<tfoot>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "174": {
+      "content": "<tr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "175": {
+      "content": "<th>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "176": {
+      "content": "<td>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "177": {
+      "content": "</table>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "178": {
+      "content": "</caption>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "179": {
+      "content": "</thead>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "180": {
+      "content": "</tbody>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "181": {
+      "content": "</tfoot>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "182": {
+      "content": "</tr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "183": {
+      "content": "</th>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "184": {
+      "content": "</td>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "185": {
+      "content": "<h1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "186": {
+      "content": "<h2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "187": {
+      "content": "<h3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "188": {
+      "content": "<h4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "189": {
+      "content": "<h5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "190": {
+      "content": "<h6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "191": {
+      "content": "<blockquote>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "192": {
+      "content": "</h1>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "193": {
+      "content": "</h2>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "194": {
+      "content": "</h3>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "195": {
+      "content": "</h4>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "196": {
+      "content": "</h5>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "197": {
+      "content": "</h6>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "198": {
+      "content": "</blockquote>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "199": {
+      "content": "<strong>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "200": {
+      "content": "<em>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "201": {
+      "content": "<b>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "202": {
+      "content": "<i>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "203": {
+      "content": "<u>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "204": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "205": {
+      "content": "<sub>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "206": {
+      "content": "<sup>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "207": {
+      "content": "<code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "208": {
+      "content": "</strong>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "209": {
+      "content": "</em>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "210": {
+      "content": "</b>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "211": {
+      "content": "</i>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "212": {
+      "content": "</u>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "213": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "214": {
+      "content": "</sub>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "215": {
+      "content": "</sup>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "216": {
+      "content": "</code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<start_of_turn>",
+    "<end_of_turn>"
+  ],
+  "bos_token": "<bos>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<eos>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<eos>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "GemmaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

SFTmodels/gemma-2-9b_sftm3genre7200/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb2e1fbc22ca1a44724292a648859373fcbe934ffac197dfc9c0be01a38da097
+size 6072

Untitled.ipynb ADDED Viewed

	@@ -0,0 +1,744 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa178322-0de1-46e3-bdaa-935d448cafda",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#SFT \n",
+    "from unsloth import FastLanguageModel\n",
+    "import torch\n",
+    "max_seq_length = 2048*4 # Choose any! We auto support RoPE Scaling internally!\n",
+    "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+    "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
+    "datapath = 'readsy/stories/'\n",
+    "pairpath = 'readsy/pairs/readsy_story_pairs0407.csv'\n",
+    "mode='m3'\n",
+    "split_by = 'genre'\n",
+    "model_name = 'model/gemma/gemma-2b/'\n",
+    "lease_likes = 10\n",
+    "suffix = 'vast'\n",
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-1] + '_sft' + mode + split_by + str(lease_likes) + suffix\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "280c81eb-4879-41d9-aea4-1dffc2edf836",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+    "    model_name = model_name, # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dtype = dtype,\n",
+    "    load_in_4bit = load_in_4bit,\n",
+    "    # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n",
+    ")\n",
+    "model = FastLanguageModel.get_peft_model(\n",
+    "    model,\n",
+    "    use_gradient_checkpointing = \"unsloth\",\n",
+    "    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
+    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
+    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
+    "    lora_alpha = 16,\n",
+    "    lora_dropout = 0, # Supports any, but = 0 is optimized\n",
+    "    bias = \"none\",    # Supports any, but = \"none\" is optimized\n",
+    "    random_state = 3407,\n",
+    "    use_rslora = False,  # We support rank stabilized LoRA\n",
+    "    loftq_config = None, # And LoftQ\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5989150b-1ad0-4168-8a28-d0379045ddd7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "the total number of pairs is  29618\n",
+      "the number of effective pairs is  23244\n",
+      "Index(['prompt_id', 'prompt', 'story_id', 'story_title', 'story_author',\n",
+      "       'story_url', 'link', 'genre', 'is_sensitive', 'categories', 'likes',\n",
+      "       'story_text', 'posted_date', 'comments'],\n",
+      "      dtype='object')\n",
+      "{'Horror': 1887, 'Middle School': 1770, 'Character': 1474, 'Thriller and Suspense': 1104, 'Adults': 1090, 'Fluff': 1070, 'Kids': 1063, 'Dialogue': 978, 'Mystery': 920, 'Science Fiction': 849, 'Teens': 824, 'Romance': 806, 'Angst': 802, 'Dramatic': 729, 'Summer': 715, 'Adventure': 697, 'High School': 639, 'Fiction': 585, 'Novel': 510, 'Dark': 505, 'Sad': 481, 'Winter': 432, 'Fantasy': 417, 'Narrative': 403, \"Valentine's Day\": 362, 'Spring': 304, 'Nonfiction': 283, 'Dystopian': 237, 'Short Story': 223, 'Funny': 219, 'Halloween': 208, 'Fall': 206, 'Holiday': 158, 'Historical Fiction': 118, 'Christmas': 89, 'Vampire': 54, 'Thanksgiving': 33}\n",
+      "the genre of test set is  ['Horror']\n",
+      "the percentage of test set is  0.08118224057821373 where total is  23244\n"
+     ]
+    }
+   ],
+   "source": [
+    "from dataloader import StoryPairDataset\n",
+    "SPdataloader = StoryPairDataset(datapath,\n",
+    "                              pairpath,\n",
+    "                              tokenizer,\n",
+    "                              task='sft',\n",
+    "                              used_dataset_size=-1,\n",
+    "                              train_test_split=0.1,\n",
+    "                              split_by=split_by,\n",
+    "                              max_len=4096,\n",
+    "                              mode= mode,\n",
+    "                              max_time_window=3600,\n",
+    "                              least_likes= lease_likes,\n",
+    "                              margin=False)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ec67afee-86b1-4c91-b3ad-013db3e36bf5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d804ea0-5619-49a8-87b7-1e6149589865",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-2] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "from trl import SFTTrainer\n",
+    "from transformers import TrainingArguments\n",
+    "\n",
+    "trainer = SFTTrainer(\n",
+    "    model = model,\n",
+    "    tokenizer = tokenizer,\n",
+    "    train_dataset = SPdataloader.dataset[\"train\"],\n",
+    "    eval_dataset = SPdataloader.dataset[\"test\"],\n",
+    "    dataset_text_field = \"text\",\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dataset_num_proc = 1,\n",
+    "    packing = True, # Can make training 5x faster for short sequences.\n",
+    "    args = TrainingArguments(\n",
+    "        per_device_train_batch_size = 1,\n",
+    "        gradient_accumulation_steps = 2,\n",
+    "        warmup_steps = 5,\n",
+    "        num_train_epochs = 1,\n",
+    "        learning_rate = 1e-4,\n",
+    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
+    "        bf16 = torch.cuda.is_bf16_supported(),\n",
+    "        logging_steps = 1,\n",
+    "        optim = \"adamw_8bit\",\n",
+    "        weight_decay = 0.01,\n",
+    "        lr_scheduler_type = \"cosine\",\n",
+    "        seed = 3407,\n",
+    "        output_dir = save_path,\n",
+    "    ),\n",
+    ")\n",
+    "trainer.train()\n",
+    "#save the model AND the tokenizer\n",
+    "trainer.save_model(save_path)\n",
+    "#trainer.save_tokenizer(save_path)\n",
+    "print('model saved at', save_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f85bcda-a568-4d4e-b2e1-4f06972df5d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#SFT \n",
+    "from unsloth import FastLanguageModel\n",
+    "import torch\n",
+    "max_seq_length = 2048*4 # Choose any! We auto support RoPE Scaling internally!\n",
+    "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+    "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
+    "datapath = 'readsy/stories/'\n",
+    "pairpath = 'readsy/pairs/readsy_story_pairs0407.csv'\n",
+    "mode='m3'\n",
+    "split_by = 'time'\n",
+    "model_name = 'model/gemma/gemma-2b/'\n",
+    "lease_likes = 10\n",
+    "suffix = 'vast'\n",
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-1] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+    "    model_name = model_name, # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dtype = dtype,\n",
+    "    load_in_4bit = load_in_4bit,\n",
+    "    # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n",
+    ")\n",
+    "model = FastLanguageModel.get_peft_model(\n",
+    "    model,\n",
+    "    use_gradient_checkpointing = \"unsloth\",\n",
+    "    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
+    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
+    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
+    "    lora_alpha = 16,\n",
+    "    lora_dropout = 0, # Supports any, but = 0 is optimized\n",
+    "    bias = \"none\",    # Supports any, but = \"none\" is optimized\n",
+    "    random_state = 3407,\n",
+    "    use_rslora = False,  # We support rank stabilized LoRA\n",
+    "    loftq_config = None, # And LoftQ\n",
+    ")\n",
+    "from dataloader import StoryPairDataset\n",
+    "SPdataloader = StoryPairDataset(datapath,\n",
+    "                              pairpath,\n",
+    "                              tokenizer,\n",
+    "                              task='sft',\n",
+    "                              used_dataset_size=-1,\n",
+    "                              train_test_split=0.1,\n",
+    "                              split_by=split_by,\n",
+    "                              max_len=4096,\n",
+    "                              mode= mode,\n",
+    "                              max_time_window=3600,\n",
+    "                              least_likes= lease_likes,\n",
+    "                              margin=False)\n",
+    "\n",
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-2] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "from trl import SFTTrainer\n",
+    "from transformers import TrainingArguments\n",
+    "\n",
+    "trainer = SFTTrainer(\n",
+    "    model = model,\n",
+    "    tokenizer = tokenizer,\n",
+    "    train_dataset = SPdataloader.dataset[\"train\"],\n",
+    "    eval_dataset = SPdataloader.dataset[\"test\"],\n",
+    "    dataset_text_field = \"text\",\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dataset_num_proc = 1,\n",
+    "    packing = True, # Can make training 5x faster for short sequences.\n",
+    "    args = TrainingArguments(\n",
+    "        per_device_train_batch_size = 1,\n",
+    "        gradient_accumulation_steps = 2,\n",
+    "        warmup_steps = 5,\n",
+    "        num_train_epochs = 1,\n",
+    "        learning_rate = 1e-4,\n",
+    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
+    "        bf16 = torch.cuda.is_bf16_supported(),\n",
+    "        logging_steps = 1,\n",
+    "        optim = \"adamw_8bit\",\n",
+    "        weight_decay = 0.01,\n",
+    "        lr_scheduler_type = \"cosine\",\n",
+    "        seed = 3407,\n",
+    "        output_dir = save_path,\n",
+    "    ),\n",
+    ")\n",
+    "trainer.train()\n",
+    "#save the model AND the tokenizer\n",
+    "trainer.save_model(save_path)\n",
+    "#trainer.save_tokenizer(save_path)\n",
+    "print('model saved at', save_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "758d7c03-a9f3-415b-a12e-4f508332cb22",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#SFT \n",
+    "from unsloth import FastLanguageModel\n",
+    "import torch\n",
+    "max_seq_length = 2048*4 # Choose any! We auto support RoPE Scaling internally!\n",
+    "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+    "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
+    "datapath = 'readsy/stories/'\n",
+    "pairpath = 'readsy/pairs/readsy_story_pairs0407.csv'\n",
+    "mode='m3'\n",
+    "split_by = 'random'\n",
+    "model_name = 'model/gemma/gemma-2b/'\n",
+    "lease_likes = 10\n",
+    "suffix = 'vast'\n",
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-1] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+    "    model_name = model_name, # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dtype = dtype,\n",
+    "    load_in_4bit = load_in_4bit,\n",
+    "    # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n",
+    ")\n",
+    "model = FastLanguageModel.get_peft_model(\n",
+    "    model,\n",
+    "    use_gradient_checkpointing = \"unsloth\",\n",
+    "    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
+    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
+    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
+    "    lora_alpha = 16,\n",
+    "    lora_dropout = 0, # Supports any, but = 0 is optimized\n",
+    "    bias = \"none\",    # Supports any, but = \"none\" is optimized\n",
+    "    random_state = 3407,\n",
+    "    use_rslora = False,  # We support rank stabilized LoRA\n",
+    "    loftq_config = None, # And LoftQ\n",
+    ")\n",
+    "from dataloader import StoryPairDataset\n",
+    "SPdataloader = StoryPairDataset(datapath,\n",
+    "                              pairpath,\n",
+    "                              tokenizer,\n",
+    "                              task='sft',\n",
+    "                              used_dataset_size=-1,\n",
+    "                              train_test_split=0.1,\n",
+    "                              split_by=split_by,\n",
+    "                              max_len=4096,\n",
+    "                              mode= mode,\n",
+    "                              max_time_window=3600,\n",
+    "                              least_likes= lease_likes,\n",
+    "                              margin=False)\n",
+    "\n",
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-2] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "from trl import SFTTrainer\n",
+    "from transformers import TrainingArguments\n",
+    "\n",
+    "trainer = SFTTrainer(\n",
+    "    model = model,\n",
+    "    tokenizer = tokenizer,\n",
+    "    train_dataset = SPdataloader.dataset[\"train\"],\n",
+    "    eval_dataset = SPdataloader.dataset[\"test\"],\n",
+    "    dataset_text_field = \"text\",\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dataset_num_proc = 1,\n",
+    "    packing = True, # Can make training 5x faster for short sequences.\n",
+    "    args = TrainingArguments(\n",
+    "        per_device_train_batch_size = 1,\n",
+    "        gradient_accumulation_steps = 2,\n",
+    "        warmup_steps = 5,\n",
+    "        num_train_epochs = 1,\n",
+    "        learning_rate = 1e-4,\n",
+    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
+    "        bf16 = torch.cuda.is_bf16_supported(),\n",
+    "        logging_steps = 1,\n",
+    "        optim = \"adamw_8bit\",\n",
+    "        weight_decay = 0.01,\n",
+    "        lr_scheduler_type = \"cosine\",\n",
+    "        seed = 3407,\n",
+    "        output_dir = save_path,\n",
+    "    ),\n",
+    ")\n",
+    "trainer.train()\n",
+    "#save the model AND the tokenizer\n",
+    "trainer.save_model(save_path)\n",
+    "#trainer.save_tokenizer(save_path)\n",
+    "print('model saved at', save_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ebc6ed22-6469-4385-a44c-700084cc43cc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#SFT \n",
+    "from unsloth import FastLanguageModel\n",
+    "import torch\n",
+    "max_seq_length = 2048*4 # Choose any! We auto support RoPE Scaling internally!\n",
+    "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+    "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
+    "datapath = 'readsy/stories/'\n",
+    "pairpath = 'readsy/pairs/readsy_story_pairs0407.csv'\n",
+    "mode='m2'\n",
+    "split_by = 'time'\n",
+    "model_name = 'model/gemma/gemma-2b/'\n",
+    "lease_likes = 10\n",
+    "suffix = 'vast'\n",
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-1] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+    "    model_name = model_name, # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dtype = dtype,\n",
+    "    load_in_4bit = load_in_4bit,\n",
+    "    # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n",
+    ")\n",
+    "model = FastLanguageModel.get_peft_model(\n",
+    "    model,\n",
+    "    use_gradient_checkpointing = \"unsloth\",\n",
+    "    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
+    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
+    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
+    "    lora_alpha = 16,\n",
+    "    lora_dropout = 0, # Supports any, but = 0 is optimized\n",
+    "    bias = \"none\",    # Supports any, but = \"none\" is optimized\n",
+    "    random_state = 3407,\n",
+    "    use_rslora = False,  # We support rank stabilized LoRA\n",
+    "    loftq_config = None, # And LoftQ\n",
+    ")\n",
+    "from dataloader import StoryPairDataset\n",
+    "SPdataloader = StoryPairDataset(datapath,\n",
+    "                              pairpath,\n",
+    "                              tokenizer,\n",
+    "                              task='sft',\n",
+    "                              used_dataset_size=-1,\n",
+    "                              train_test_split=0.1,\n",
+    "                              split_by=split_by,\n",
+    "                              max_len=4096,\n",
+    "                              mode= mode,\n",
+    "                              max_time_window=3600,\n",
+    "                              least_likes= lease_likes,\n",
+    "                              margin=False)\n",
+    "\n",
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-2] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "from trl import SFTTrainer\n",
+    "from transformers import TrainingArguments\n",
+    "\n",
+    "trainer = SFTTrainer(\n",
+    "    model = model,\n",
+    "    tokenizer = tokenizer,\n",
+    "    train_dataset = SPdataloader.dataset[\"train\"],\n",
+    "    eval_dataset = SPdataloader.dataset[\"test\"],\n",
+    "    dataset_text_field = \"text\",\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dataset_num_proc = 1,\n",
+    "    packing = True, # Can make training 5x faster for short sequences.\n",
+    "    args = TrainingArguments(\n",
+    "        per_device_train_batch_size = 1,\n",
+    "        gradient_accumulation_steps = 2,\n",
+    "        warmup_steps = 5,\n",
+    "        num_train_epochs = 1,\n",
+    "        learning_rate = 1e-4,\n",
+    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
+    "        bf16 = torch.cuda.is_bf16_supported(),\n",
+    "        logging_steps = 1,\n",
+    "        optim = \"adamw_8bit\",\n",
+    "        weight_decay = 0.01,\n",
+    "        lr_scheduler_type = \"cosine\",\n",
+    "        seed = 3407,\n",
+    "        output_dir = save_path,\n",
+    "    ),\n",
+    ")\n",
+    "trainer.train()\n",
+    "#save the model AND the tokenizer\n",
+    "trainer.save_model(save_path)\n",
+    "#trainer.save_tokenizer(save_path)\n",
+    "print('model saved at', save_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "82c79072-5128-4bc2-844c-ae001616a402",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#SFT \n",
+    "from unsloth import FastLanguageModel\n",
+    "import torch\n",
+    "max_seq_length = 2048*4 # Choose any! We auto support RoPE Scaling internally!\n",
+    "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+    "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
+    "datapath = 'readsy/stories/'\n",
+    "pairpath = 'readsy/pairs/readsy_story_pairs0407.csv'\n",
+    "mode='m2'\n",
+    "split_by = 'random'\n",
+    "model_name = 'model/gemma/gemma-2b/'\n",
+    "lease_likes = 10\n",
+    "suffix = 'vast'\n",
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-1] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+    "    model_name = model_name, # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dtype = dtype,\n",
+    "    load_in_4bit = load_in_4bit,\n",
+    "    # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n",
+    ")\n",
+    "model = FastLanguageModel.get_peft_model(\n",
+    "    model,\n",
+    "    use_gradient_checkpointing = \"unsloth\",\n",
+    "    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
+    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
+    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
+    "    lora_alpha = 16,\n",
+    "    lora_dropout = 0, # Supports any, but = 0 is optimized\n",
+    "    bias = \"none\",    # Supports any, but = \"none\" is optimized\n",
+    "    random_state = 3407,\n",
+    "    use_rslora = False,  # We support rank stabilized LoRA\n",
+    "    loftq_config = None, # And LoftQ\n",
+    ")\n",
+    "from dataloader import StoryPairDataset\n",
+    "SPdataloader = StoryPairDataset(datapath,\n",
+    "                              pairpath,\n",
+    "                              tokenizer,\n",
+    "                              task='sft',\n",
+    "                              used_dataset_size=-1,\n",
+    "                              train_test_split=0.1,\n",
+    "                              split_by=split_by,\n",
+    "                              max_len=4096,\n",
+    "                              mode= mode,\n",
+    "                              max_time_window=3600,\n",
+    "                              least_likes= lease_likes,\n",
+    "                              margin=False)\n",
+    "\n",
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-2] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "from trl import SFTTrainer\n",
+    "from transformers import TrainingArguments\n",
+    "\n",
+    "trainer = SFTTrainer(\n",
+    "    model = model,\n",
+    "    tokenizer = tokenizer,\n",
+    "    train_dataset = SPdataloader.dataset[\"train\"],\n",
+    "    eval_dataset = SPdataloader.dataset[\"test\"],\n",
+    "    dataset_text_field = \"text\",\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dataset_num_proc = 1,\n",
+    "    packing = True, # Can make training 5x faster for short sequences.\n",
+    "    args = TrainingArguments(\n",
+    "        per_device_train_batch_size = 1,\n",
+    "        gradient_accumulation_steps = 2,\n",
+    "        warmup_steps = 5,\n",
+    "        num_train_epochs = 1,\n",
+    "        learning_rate = 1e-4,\n",
+    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
+    "        bf16 = torch.cuda.is_bf16_supported(),\n",
+    "        logging_steps = 1,\n",
+    "        optim = \"adamw_8bit\",\n",
+    "        weight_decay = 0.01,\n",
+    "        lr_scheduler_type = \"cosine\",\n",
+    "        seed = 3407,\n",
+    "        output_dir = save_path,\n",
+    "    ),\n",
+    ")\n",
+    "trainer.train()\n",
+    "#save the model AND the tokenizer\n",
+    "trainer.save_model(save_path)\n",
+    "#trainer.save_tokenizer(save_path)\n",
+    "print('model saved at', save_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "69f5a65f-8ddc-42a2-873f-432cb363386d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#SFT \n",
+    "from unsloth import FastLanguageModel\n",
+    "import torch\n",
+    "max_seq_length = 2048*4 # Choose any! We auto support RoPE Scaling internally!\n",
+    "dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+\n",
+    "load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.\n",
+    "datapath = 'readsy/stories/'\n",
+    "pairpath = 'readsy/pairs/readsy_story_pairs0407.csv'\n",
+    "mode='m2'\n",
+    "split_by = 'genre'\n",
+    "model_name = 'model/gemma/gemma-2b/'\n",
+    "lease_likes = 10\n",
+    "suffix = 'vast'\n",
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-1] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+    "    model_name = model_name, # Choose ANY! eg teknium/OpenHermes-2.5-Mistral-7B\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dtype = dtype,\n",
+    "    load_in_4bit = load_in_4bit,\n",
+    "    # token = \"hf_...\", # use one if using gated models like meta-llama/Llama-2-7b-hf\n",
+    ")\n",
+    "model = FastLanguageModel.get_peft_model(\n",
+    "    model,\n",
+    "    use_gradient_checkpointing = \"unsloth\",\n",
+    "    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
+    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
+    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
+    "    lora_alpha = 16,\n",
+    "    lora_dropout = 0, # Supports any, but = 0 is optimized\n",
+    "    bias = \"none\",    # Supports any, but = \"none\" is optimized\n",
+    "    random_state = 3407,\n",
+    "    use_rslora = False,  # We support rank stabilized LoRA\n",
+    "    loftq_config = None, # And LoftQ\n",
+    ")\n",
+    "from dataloader import StoryPairDataset\n",
+    "SPdataloader = StoryPairDataset(datapath,\n",
+    "                              pairpath,\n",
+    "                              tokenizer,\n",
+    "                              task='sft',\n",
+    "                              used_dataset_size=-1,\n",
+    "                              train_test_split=0.1,\n",
+    "                              split_by=split_by,\n",
+    "                              max_len=4096,\n",
+    "                              mode= mode,\n",
+    "                              max_time_window=3600,\n",
+    "                              least_likes= lease_likes,\n",
+    "                              margin=False)\n",
+    "\n",
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-2] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "from trl import SFTTrainer\n",
+    "from transformers import TrainingArguments\n",
+    "\n",
+    "trainer = SFTTrainer(\n",
+    "    model = model,\n",
+    "    tokenizer = tokenizer,\n",
+    "    train_dataset = SPdataloader.dataset[\"train\"],\n",
+    "    eval_dataset = SPdataloader.dataset[\"test\"],\n",
+    "    dataset_text_field = \"text\",\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    dataset_num_proc = 1,\n",
+    "    packing = True, # Can make training 5x faster for short sequences.\n",
+    "    args = TrainingArguments(\n",
+    "        per_device_train_batch_size = 1,\n",
+    "        gradient_accumulation_steps = 2,\n",
+    "        warmup_steps = 5,\n",
+    "        num_train_epochs = 1,\n",
+    "        learning_rate = 1e-4,\n",
+    "        fp16 = not torch.cuda.is_bf16_supported(),\n",
+    "        bf16 = torch.cuda.is_bf16_supported(),\n",
+    "        logging_steps = 1,\n",
+    "        optim = \"adamw_8bit\",\n",
+    "        weight_decay = 0.01,\n",
+    "        lr_scheduler_type = \"cosine\",\n",
+    "        seed = 3407,\n",
+    "        output_dir = save_path,\n",
+    "    ),\n",
+    ")\n",
+    "trainer.train()\n",
+    "#save the model AND the tokenizer\n",
+    "trainer.save_model(save_path)\n",
+    "#trainer.save_tokenizer(save_path)\n",
+    "print('model saved at', save_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "357116f9-e206-4a77-acf6-43835d2b83bf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Prompt: Write a story about discovering a lost manuscript. It can be from a famous (or infamous) author, or an unknown one.\n",
+      "inputs: <bos><|im_start|>user\n",
+      "Write a story about discovering a lost manuscript. It can be from a famous (or infamous) author, or an unknown one.<|im_end|>\n",
+      "<|im_start|>assistant\n",
+      "\n",
+      "inputs encoded: tensor([[     2,      2, 235322, 235371,    571, 235298,   2997,  73786,   1645,\n",
+      "            108,   5559,    476,   3904,   1105,  59551,    476,   5501,  28086,\n",
+      "         235265,   1165,    798,    614,    774,    476,  10964,    591,    483,\n",
+      "          76100, 235275,   3426, 235269,    689,    671,  12417,    974,  35606,\n",
+      "         235371,    571, 235298,    615,  73786,    108, 235322, 235371,    571,\n",
+      "         235298,   2997,  73786, 105776,    108]])\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[10], line 23\u001b[0m\n\u001b[1;32m     21\u001b[0m prompt \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWrite a story about discovering a lost manuscript. It can be from a famous (or infamous) author, or an unknown one.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     22\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrompt:\u001b[39m\u001b[38;5;124m\"\u001b[39m, prompt)\n\u001b[0;32m---> 23\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtokenizer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     24\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwritten by the model:\u001b[39m\u001b[38;5;124m'\u001b[39m, model_path)  \n\u001b[1;32m     25\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGenerated story:\u001b[39m\u001b[38;5;124m\"\u001b[39m, outputs)\n",
+      "Cell \u001b[0;32mIn[10], line 14\u001b[0m, in \u001b[0;36mgenerate\u001b[0;34m(model, tokenizer, prompt, max_length)\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[38;5;66;03m# Move inputs to GPU\u001b[39;00m\n\u001b[1;32m     12\u001b[0m inputs \u001b[38;5;241m=\u001b[39m inputs\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 14\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_new_tokens\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mmax_length\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmin_new_tokens\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m500\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m     15\u001b[0m \u001b[38;5;66;03m#decode the outputs\u001b[39;00m\n\u001b[1;32m     16\u001b[0m outputs \u001b[38;5;241m=\u001b[39m tokenizer\u001b[38;5;241m.\u001b[39mdecode(outputs[\u001b[38;5;241m0\u001b[39m], skip_special_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/peft/peft_model.py:1491\u001b[0m, in \u001b[0;36mPeftModelForCausalLM.generate\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1489\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_enable_peft_forward_hooks(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m   1490\u001b[0m         kwargs \u001b[38;5;241m=\u001b[39m {k: v \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m k \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mspecial_peft_forward_args}\n\u001b[0;32m-> 1491\u001b[0m         outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbase_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1492\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1493\u001b[0m     outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbase_model\u001b[38;5;241m.\u001b[39mgenerate(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/torch/utils/_contextlib.py:115\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m    113\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m    114\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 115\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1758\u001b[0m, in \u001b[0;36mGenerationMixin.generate\u001b[0;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001b[0m\n\u001b[1;32m   1750\u001b[0m     input_ids, model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_expand_inputs_for_generation(\n\u001b[1;32m   1751\u001b[0m         input_ids\u001b[38;5;241m=\u001b[39minput_ids,\n\u001b[1;32m   1752\u001b[0m         expand_size\u001b[38;5;241m=\u001b[39mgeneration_config\u001b[38;5;241m.\u001b[39mnum_return_sequences,\n\u001b[1;32m   1753\u001b[0m         is_encoder_decoder\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39mis_encoder_decoder,\n\u001b[1;32m   1754\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs,\n\u001b[1;32m   1755\u001b[0m     )\n\u001b[1;32m   1757\u001b[0m     \u001b[38;5;66;03m# 13. run sample (it degenerates to greedy search when `generation_config.do_sample=False`)\u001b[39;00m\n\u001b[0;32m-> 1758\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sample\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1759\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1760\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlogits_processor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_logits_processor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1761\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlogits_warper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_logits_warper\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1762\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstopping_criteria\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprepared_stopping_criteria\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1763\u001b[0m \u001b[43m        \u001b[49m\u001b[43mgeneration_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1764\u001b[0m \u001b[43m        \u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msynced_gpus\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1765\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstreamer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstreamer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1766\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1767\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1769\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m generation_mode \u001b[38;5;129;01min\u001b[39;00m (GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SAMPLE, GenerationMode\u001b[38;5;241m.\u001b[39mBEAM_SEARCH):\n\u001b[1;32m   1770\u001b[0m     \u001b[38;5;66;03m# 11. prepare logits warper\u001b[39;00m\n\u001b[1;32m   1771\u001b[0m     prepared_logits_warper \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m   1772\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_logits_warper(generation_config) \u001b[38;5;28;01mif\u001b[39;00m generation_config\u001b[38;5;241m.\u001b[39mdo_sample \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1773\u001b[0m     )\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:2392\u001b[0m, in \u001b[0;36mGenerationMixin._sample\u001b[0;34m(self, input_ids, logits_processor, stopping_criteria, generation_config, synced_gpus, streamer, logits_warper, **model_kwargs)\u001b[0m\n\u001b[1;32m   2389\u001b[0m unfinished_sequences \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mones(batch_size, dtype\u001b[38;5;241m=\u001b[39mtorch\u001b[38;5;241m.\u001b[39mlong, device\u001b[38;5;241m=\u001b[39minput_ids\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m   2390\u001b[0m model_kwargs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_initial_cache_position(input_ids, model_kwargs)\n\u001b[0;32m-> 2392\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_has_unfinished_sequences\u001b[49m\u001b[43m(\u001b[49m\u001b[43mthis_peer_finished\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msynced_gpus\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m   2393\u001b[0m     \u001b[38;5;66;03m# prepare model inputs\u001b[39;00m\n\u001b[1;32m   2394\u001b[0m     model_inputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprepare_inputs_for_generation(input_ids, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs)\n\u001b[1;32m   2396\u001b[0m     \u001b[38;5;66;03m# forward pass to get next token\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/transformers/generation/utils.py:1922\u001b[0m, in \u001b[0;36mGenerationMixin._has_unfinished_sequences\u001b[0;34m(self, this_peer_finished, synced_gpus, device)\u001b[0m\n\u001b[1;32m   1920\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m this_peer_finished_flag\u001b[38;5;241m.\u001b[39mitem() \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0.0\u001b[39m:\n\u001b[1;32m   1921\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[0;32m-> 1922\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m this_peer_finished:\n\u001b[1;32m   1923\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m   1924\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
+      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "\n",
+    "\n",
+    "def generate(model, tokenizer, prompt, max_length=1024*4):\n",
+    "    chat = [\n",
+    "        {\"role\":\"user\", \"content\":prompt},\n",
+    "    ]\n",
+    "    inputs = tokenizer.apply_chat_template(chat, tokenize = False, add_generation_prompt = True)\n",
+    "    #add bos token\n",
+    "    inputs = tokenizer.bos_token + inputs\n",
+    "    print(\"inputs:\", inputs)\n",
+    "    inputs = tokenizer.encode(inputs, add_special_tokens=True, return_tensors=\"pt\")\n",
+    "    print(\"inputs encoded:\", inputs)\n",
+    "    # Move inputs to GPU\n",
+    "    inputs = inputs.to(\"cuda\")\n",
+    "    \n",
+    "    outputs = model.generate(input_ids=inputs, max_new_tokens = max_length, min_new_tokens = 500)\n",
+    "    #decode the outputs\n",
+    "    outputs = tokenizer.decode(outputs[0], skip_special_tokens=False)\n",
+    "    return outputs\n",
+    "\n",
+    "\n",
+    "\n",
+    "prompt = \"Write a story about discovering a lost manuscript. It can be from a famous (or infamous) author, or an unknown one.\"\n",
+    "print(\"Prompt:\", prompt)\n",
+    "outputs = generate(model, tokenizer, prompt)\n",
+    "print('written by the model:', model_path)  \n",
+    "print(\"Generated story:\", outputs)\n",
+    "print(\"Length of the generated story:\", len(outputs.split()))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "20c32f2e-0da4-446c-a722-74ebef7eb508",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'model/SFTmodels/gemma-2b_sftm3genre10vast'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "save_path = 'model/SFTmodels/' +model_name.split('/')[-2] + '_sft' + mode + split_by + str(lease_likes) + suffix\n",
+    "save_path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "859e0d8d-e677-4fca-981c-bca2590f2250",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'<pad>'"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "478d07be-fbfc-4ce1-841a-9345ff2a1cbd",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

Untitled1.ipynb ADDED Viewed

	@@ -0,0 +1,1519 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "10676e1f-9a7d-453f-9334-246ebb2142c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "datapath = 'readsy/stories/'\n",
+    "pairpath = 'readsy/pairs/readsy_story_pairs0407.csv'\n",
+    "from transformers import AutoTokenizer\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"unsloth/gemma-2b-bnb-4bit\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "7d73d0b5-7356-4c06-96bc-91dc807bcb0d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "the total number of pairs is  100\n",
+      "the number of effective pairs is  91\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "No chat template is set for this tokenizer, falling back to a default class-level template. This is very error-prone, because models are often trained with templates different from the class default! Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which point any code depending on them will stop working. We recommend setting a valid chat template before then to ensure that this model continues working without issues.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Index(['prompt_id', 'prompt', 'story_id', 'story_title', 'story_author',\n",
+      "       'story_url', 'link', 'genre', 'is_sensitive', 'categories', 'likes',\n",
+      "       'story_text', 'posted_date', 'comments'],\n",
+      "      dtype='object')\n",
+      "the columns of train is  Index(['prompt_id', 'story1_id', 'story2_id', 'time_lag', 'least_likes'], dtype='object')\n",
+      "prompt_1234\n",
+      "78559    Write a story about someone who finds somethin...\n",
+      "78560    Write a story about someone who finds somethin...\n",
+      "78561    Write a story about someone who finds somethin...\n",
+      "78562    Write a story about someone who finds somethin...\n",
+      "78563    Write a story about someone who finds somethin...\n",
+      "                               ...                        \n",
+      "78644    Write a story about someone who finds somethin...\n",
+      "78645    Write a story about someone who finds somethin...\n",
+      "78646    Write a story about someone who finds somethin...\n",
+      "78647    Write a story about someone who finds somethin...\n",
+      "78648    Write a story about someone who finds somethin...\n",
+      "Name: prompt, Length: 90, dtype: object\n",
+      "prompt_0852\n",
+      "24702    End your story with a character standing in th...\n",
+      "24703    End your story with a character standing in th...\n",
+      "24704    End your story with a character standing in th...\n",
+      "24705    End your story with a character standing in th...\n",
+      "24706    End your story with a character standing in th...\n",
+      "                               ...                        \n",
+      "24799    End your story with a character standing in th...\n",
+      "24800    End your story with a character standing in th...\n",
+      "24801    End your story with a character standing in th...\n",
+      "24802    End your story with a character standing in th...\n",
+      "24803    End your story with a character standing in th...\n",
+      "Name: prompt, Length: 102, dtype: object\n",
+      "prompt_1153\n",
+      "66372    Write a story about someone who's haunted by t...\n",
+      "66373    Write a story about someone who's haunted by t...\n",
+      "66374    Write a story about someone who's haunted by t...\n",
+      "66375    Write a story about someone who's haunted by t...\n",
+      "66376    Write a story about someone who's haunted by t...\n",
+      "                               ...                        \n",
+      "66621    Write a story about someone who's haunted by t...\n",
+      "66622    Write a story about someone who's haunted by t...\n",
+      "66623    Write a story about someone who's haunted by t...\n",
+      "66624    Write a story about someone who's haunted by t...\n",
+      "66625    Write a story about someone who's haunted by t...\n",
+      "Name: prompt, Length: 254, dtype: object\n",
+      "prompt_1167\n",
+      "69390    Write a story that takes place in a waiting room.\n",
+      "69391    Write a story that takes place in a waiting room.\n",
+      "69392    Write a story that takes place in a waiting room.\n",
+      "69393    Write a story that takes place in a waiting room.\n",
+      "69394    Write a story that takes place in a waiting room.\n",
+      "                               ...                        \n",
+      "69643    Write a story that takes place in a waiting room.\n",
+      "69644    Write a story that takes place in a waiting room.\n",
+      "69645    Write a story that takes place in a waiting room.\n",
+      "69646    Write a story that takes place in a waiting room.\n",
+      "69647    Write a story that takes place in a waiting room.\n",
+      "Name: prompt, Length: 258, dtype: object\n",
+      "prompt_0901\n",
+      "28473    Frame your story as an adult recalling the eve...\n",
+      "28474    Frame your story as an adult recalling the eve...\n",
+      "28475    Frame your story as an adult recalling the eve...\n",
+      "28476    Frame your story as an adult recalling the eve...\n",
+      "28477    Frame your story as an adult recalling the eve...\n",
+      "                               ...                        \n",
+      "28714    Frame your story as an adult recalling the eve...\n",
+      "28715    Frame your story as an adult recalling the eve...\n",
+      "28716    Frame your story as an adult recalling the eve...\n",
+      "28717    Frame your story as an adult recalling the eve...\n",
+      "28718    Frame your story as an adult recalling the eve...\n",
+      "Name: prompt, Length: 246, dtype: object\n",
+      "prompt_1160\n",
+      "68088    Write a story about a person experiencing pre-...\n",
+      "68089    Write a story about a person experiencing pre-...\n",
+      "68090    Write a story about a person experiencing pre-...\n",
+      "68091    Write a story about a person experiencing pre-...\n",
+      "68092    Write a story about a person experiencing pre-...\n",
+      "                               ...                        \n",
+      "68245    Write a story about a person experiencing pre-...\n",
+      "68246    Write a story about a person experiencing pre-...\n",
+      "68247    Write a story about a person experiencing pre-...\n",
+      "68248    Write a story about a person experiencing pre-...\n",
+      "68249    Write a story about a person experiencing pre-...\n",
+      "Name: prompt, Length: 162, dtype: object\n",
+      "prompt_0712\n",
+      "18628    Start your story with the narrator or a charac...\n",
+      "18629    Start your story with the narrator or a charac...\n",
+      "18630    Start your story with the narrator or a charac...\n",
+      "18631    Start your story with the narrator or a charac...\n",
+      "18632    Start your story with the narrator or a charac...\n",
+      "                               ...                        \n",
+      "18726    Start your story with the narrator or a charac...\n",
+      "18727    Start your story with the narrator or a charac...\n",
+      "18728    Start your story with the narrator or a charac...\n",
+      "18729    Start your story with the narrator or a charac...\n",
+      "18730    Start your story with the narrator or a charac...\n",
+      "Name: prompt, Length: 103, dtype: object\n",
+      "prompt_0901\n",
+      "28473    Frame your story as an adult recalling the eve...\n",
+      "28474    Frame your story as an adult recalling the eve...\n",
+      "28475    Frame your story as an adult recalling the eve...\n",
+      "28476    Frame your story as an adult recalling the eve...\n",
+      "28477    Frame your story as an adult recalling the eve...\n",
+      "                               ...                        \n",
+      "28714    Frame your story as an adult recalling the eve...\n",
+      "28715    Frame your story as an adult recalling the eve...\n",
+      "28716    Frame your story as an adult recalling the eve...\n",
+      "28717    Frame your story as an adult recalling the eve...\n",
+      "28718    Frame your story as an adult recalling the eve...\n",
+      "Name: prompt, Length: 246, dtype: object\n",
+      "prompt_1120\n",
+      "60854    Write about a character who’s stuck in an elev...\n",
+      "60855    Write about a character who’s stuck in an elev...\n",
+      "60856    Write about a character who’s stuck in an elev...\n",
+      "60857    Write about a character who’s stuck in an elev...\n",
+      "60858    Write about a character who’s stuck in an elev...\n",
+      "                               ...                        \n",
+      "61103    Write about a character who’s stuck in an elev...\n",
+      "61104    Write about a character who’s stuck in an elev...\n",
+      "61105    Write about a character who’s stuck in an elev...\n",
+      "61106    Write about a character who’s stuck in an elev...\n",
+      "61107    Write about a character who’s stuck in an elev...\n",
+      "Name: prompt, Length: 254, dtype: object\n",
+      "prompt_1089\n",
+      "56188    Write about a family attempting to hide their ...\n",
+      "56189    Write about a family attempting to hide their ...\n",
+      "56190    Write about a family attempting to hide their ...\n",
+      "56191    Write about a family attempting to hide their ...\n",
+      "56192    Write about a family attempting to hide their ...\n",
+      "                               ...                        \n",
+      "56305    Write about a family attempting to hide their ...\n",
+      "56306    Write about a family attempting to hide their ...\n",
+      "56307    Write about a family attempting to hide their ...\n",
+      "56308    Write about a family attempting to hide their ...\n",
+      "56309    Write about a family attempting to hide their ...\n",
+      "Name: prompt, Length: 122, dtype: object\n",
+      "prompt_1087\n",
+      "55709    Start your story with two characters deciding ...\n",
+      "55710    Start your story with two characters deciding ...\n",
+      "55711    Start your story with two characters deciding ...\n",
+      "55712    Start your story with two characters deciding ...\n",
+      "55713    Start your story with two characters deciding ...\n",
+      "                               ...                        \n",
+      "56027    Start your story with two characters deciding ...\n",
+      "56028    Start your story with two characters deciding ...\n",
+      "56029    Start your story with two characters deciding ...\n",
+      "56030    Start your story with two characters deciding ...\n",
+      "56031    Start your story with two characters deciding ...\n",
+      "Name: prompt, Length: 323, dtype: object\n",
+      "prompt_1111\n",
+      "59385    Write a post-apocalyptic story that features z...\n",
+      "59386    Write a post-apocalyptic story that features z...\n",
+      "59387    Write a post-apocalyptic story that features z...\n",
+      "59388    Write a post-apocalyptic story that features z...\n",
+      "59389    Write a post-apocalyptic story that features z...\n",
+      "                               ...                        \n",
+      "59559    Write a post-apocalyptic story that features z...\n",
+      "59560    Write a post-apocalyptic story that features z...\n",
+      "59561    Write a post-apocalyptic story that features z...\n",
+      "59562    Write a post-apocalyptic story that features z...\n",
+      "59563    Write a post-apocalyptic story that features z...\n",
+      "Name: prompt, Length: 179, dtype: object\n",
+      "prompt_0937\n",
+      "34660    Start your story with someone being presented ...\n",
+      "34661    Start your story with someone being presented ...\n",
+      "34662    Start your story with someone being presented ...\n",
+      "34663    Start your story with someone being presented ...\n",
+      "34664    Start your story with someone being presented ...\n",
+      "                               ...                        \n",
+      "34876    Start your story with someone being presented ...\n",
+      "34877    Start your story with someone being presented ...\n",
+      "34878    Start your story with someone being presented ...\n",
+      "34879    Start your story with someone being presented ...\n",
+      "34880    Start your story with someone being presented ...\n",
+      "Name: prompt, Length: 221, dtype: object\n",
+      "prompt_1024\n",
+      "46870    Write a story set in the summer, when suddenly...\n",
+      "46871    Write a story set in the summer, when suddenly...\n",
+      "46872    Write a story set in the summer, when suddenly...\n",
+      "46873    Write a story set in the summer, when suddenly...\n",
+      "46874    Write a story set in the summer, when suddenly...\n",
+      "                               ...                        \n",
+      "47029    Write a story set in the summer, when suddenly...\n",
+      "47030    Write a story set in the summer, when suddenly...\n",
+      "47031    Write a story set in the summer, when suddenly...\n",
+      "47032    Write a story set in the summer, when suddenly...\n",
+      "47033    Write a story set in the summer, when suddenly...\n",
+      "Name: prompt, Length: 164, dtype: object\n",
+      "prompt_1091\n",
+      "56566    Write a ghost story where there’s more going o...\n",
+      "56567    Write a ghost story where there’s more going o...\n",
+      "56568    Write a ghost story where there’s more going o...\n",
+      "56569    Write a ghost story where there’s more going o...\n",
+      "56570    Write a ghost story where there’s more going o...\n",
+      "                               ...                        \n",
+      "56789    Write a ghost story where there’s more going o...\n",
+      "56790    Write a ghost story where there’s more going o...\n",
+      "56791    Write a ghost story where there’s more going o...\n",
+      "56792    Write a ghost story where there’s more going o...\n",
+      "56793    Write a ghost story where there’s more going o...\n",
+      "Name: prompt, Length: 228, dtype: object\n",
+      "prompt_0968\n",
+      "39373    Start the story with the absence of a sensory ...\n",
+      "39374    Start the story with the absence of a sensory ...\n",
+      "39375    Start the story with the absence of a sensory ...\n",
+      "39376    Start the story with the absence of a sensory ...\n",
+      "39377    Start the story with the absence of a sensory ...\n",
+      "39378    Start the story with the absence of a sensory ...\n",
+      "39379    Start the story with the absence of a sensory ...\n",
+      "39380    Start the story with the absence of a sensory ...\n",
+      "39381    Start the story with the absence of a sensory ...\n",
+      "39382    Start the story with the absence of a sensory ...\n",
+      "39383    Start the story with the absence of a sensory ...\n",
+      "39384    Start the story with the absence of a sensory ...\n",
+      "39385    Start the story with the absence of a sensory ...\n",
+      "39386    Start the story with the absence of a sensory ...\n",
+      "39387    Start the story with the absence of a sensory ...\n",
+      "39388    Start the story with the absence of a sensory ...\n",
+      "39389    Start the story with the absence of a sensory ...\n",
+      "39390    Start the story with the absence of a sensory ...\n",
+      "39391    Start the story with the absence of a sensory ...\n",
+      "39392    Start the story with the absence of a sensory ...\n",
+      "39393    Start the story with the absence of a sensory ...\n",
+      "39394    Start the story with the absence of a sensory ...\n",
+      "39395    Start the story with the absence of a sensory ...\n",
+      "39396    Start the story with the absence of a sensory ...\n",
+      "39397    Start the story with the absence of a sensory ...\n",
+      "39398    Start the story with the absence of a sensory ...\n",
+      "39399    Start the story with the absence of a sensory ...\n",
+      "39400    Start the story with the absence of a sensory ...\n",
+      "39401    Start the story with the absence of a sensory ...\n",
+      "39402    Start the story with the absence of a sensory ...\n",
+      "39403    Start the story with the absence of a sensory ...\n",
+      "39404    Start the story with the absence of a sensory ...\n",
+      "39405    Start the story with the absence of a sensory ...\n",
+      "39406    Start the story with the absence of a sensory ...\n",
+      "39407    Start the story with the absence of a sensory ...\n",
+      "39408    Start the story with the absence of a sensory ...\n",
+      "39409    Start the story with the absence of a sensory ...\n",
+      "39410    Start the story with the absence of a sensory ...\n",
+      "39411    Start the story with the absence of a sensory ...\n",
+      "39412    Start the story with the absence of a sensory ...\n",
+      "39413    Start the story with the absence of a sensory ...\n",
+      "39414    Start the story with the absence of a sensory ...\n",
+      "39415    Start the story with the absence of a sensory ...\n",
+      "39416    Start the story with the absence of a sensory ...\n",
+      "39417    Start the story with the absence of a sensory ...\n",
+      "39418    Start the story with the absence of a sensory ...\n",
+      "39419    Start the story with the absence of a sensory ...\n",
+      "39420    Start the story with the absence of a sensory ...\n",
+      "39421    Start the story with the absence of a sensory ...\n",
+      "39422    Start the story with the absence of a sensory ...\n",
+      "39423    Start the story with the absence of a sensory ...\n",
+      "39424    Start the story with the absence of a sensory ...\n",
+      "39425    Start the story with the absence of a sensory ...\n",
+      "Name: prompt, dtype: object\n",
+      "prompt_0117\n",
+      "6151    Write about someone stuck in an endless cycle ...\n",
+      "6152    Write about someone stuck in an endless cycle ...\n",
+      "6153    Write about someone stuck in an endless cycle ...\n",
+      "6154    Write about someone stuck in an endless cycle ...\n",
+      "6155    Write about someone stuck in an endless cycle ...\n",
+      "                              ...                        \n",
+      "6217    Write about someone stuck in an endless cycle ...\n",
+      "6218    Write about someone stuck in an endless cycle ...\n",
+      "6219    Write about someone stuck in an endless cycle ...\n",
+      "6220    Write about someone stuck in an endless cycle ...\n",
+      "6221    Write about someone stuck in an endless cycle ...\n",
+      "Name: prompt, Length: 71, dtype: object\n",
+      "prompt_1142\n",
+      "64471    Write a story about high school sweethearts co...\n",
+      "64472    Write a story about high school sweethearts co...\n",
+      "64473    Write a story about high school sweethearts co...\n",
+      "64474    Write a story about high school sweethearts co...\n",
+      "64475    Write a story about high school sweethearts co...\n",
+      "                               ...                        \n",
+      "64753    Write a story about high school sweethearts co...\n",
+      "64754    Write a story about high school sweethearts co...\n",
+      "64755    Write a story about high school sweethearts co...\n",
+      "64756    Write a story about high school sweethearts co...\n",
+      "64757    Write a story about high school sweethearts co...\n",
+      "Name: prompt, Length: 287, dtype: object\n",
+      "prompt_0146\n",
+      "7765    Write a story entirely of dialogue. Nothing bu...\n",
+      "7766    Write a story entirely of dialogue. Nothing bu...\n",
+      "7767    Write a story entirely of dialogue. Nothing bu...\n",
+      "7768    Write a story entirely of dialogue. Nothing bu...\n",
+      "7769    Write a story entirely of dialogue. Nothing bu...\n",
+      "                              ...                        \n",
+      "8044    Write a story entirely of dialogue. Nothing bu...\n",
+      "8045    Write a story entirely of dialogue. Nothing bu...\n",
+      "8046    Write a story entirely of dialogue. Nothing bu...\n",
+      "8047    Write a story entirely of dialogue. Nothing bu...\n",
+      "8048    Write a story entirely of dialogue. Nothing bu...\n",
+      "Name: prompt, Length: 284, dtype: object\n",
+      "prompt_1005\n",
+      "43705    Write about a first date that surprises both p...\n",
+      "43706    Write about a first date that surprises both p...\n",
+      "43707    Write about a first date that surprises both p...\n",
+      "43708    Write about a first date that surprises both p...\n",
+      "43709    Write about a first date that surprises both p...\n",
+      "                               ...                        \n",
+      "43862    Write about a first date that surprises both p...\n",
+      "43863    Write about a first date that surprises both p...\n",
+      "43864    Write about a first date that surprises both p...\n",
+      "43865    Write about a first date that surprises both p...\n",
+      "43866    Write about a first date that surprises both p...\n",
+      "Name: prompt, Length: 162, dtype: object\n",
+      "prompt_1084\n",
+      "55202    Write about a vampire or werewolf who moves in...\n",
+      "55203    Write about a vampire or werewolf who moves in...\n",
+      "55204    Write about a vampire or werewolf who moves in...\n",
+      "55205    Write about a vampire or werewolf who moves in...\n",
+      "55206    Write about a vampire or werewolf who moves in...\n",
+      "                               ...                        \n",
+      "55359    Write about a vampire or werewolf who moves in...\n",
+      "55360    Write about a vampire or werewolf who moves in...\n",
+      "55361    Write about a vampire or werewolf who moves in...\n",
+      "55362    Write about a vampire or werewolf who moves in...\n",
+      "55363    Write about a vampire or werewolf who moves in...\n",
+      "Name: prompt, Length: 162, dtype: object\n",
+      "prompt_1172\n",
+      "69960    Write about someone who has a superpower.\n",
+      "69961    Write about someone who has a superpower.\n",
+      "69962    Write about someone who has a superpower.\n",
+      "69963    Write about someone who has a superpower.\n",
+      "69964    Write about someone who has a superpower.\n",
+      "                           ...                    \n",
+      "70292    Write about someone who has a superpower.\n",
+      "70293    Write about someone who has a superpower.\n",
+      "70294    Write about someone who has a superpower.\n",
+      "70295    Write about someone who has a superpower.\n",
+      "70296    Write about someone who has a superpower.\n",
+      "Name: prompt, Length: 337, dtype: object\n",
+      "prompt_0944\n",
+      "35521    Write your story about two characters tidying ...\n",
+      "35522    Write your story about two characters tidying ...\n",
+      "35523    Write your story about two characters tidying ...\n",
+      "35524    Write your story about two characters tidying ...\n",
+      "35525    Write your story about two characters tidying ...\n",
+      "                               ...                        \n",
+      "35668    Write your story about two characters tidying ...\n",
+      "35669    Write your story about two characters tidying ...\n",
+      "35670    Write your story about two characters tidying ...\n",
+      "35671    Write your story about two characters tidying ...\n",
+      "35672    Write your story about two characters tidying ...\n",
+      "Name: prompt, Length: 152, dtype: object\n",
+      "prompt_1005\n",
+      "43705    Write about a first date that surprises both p...\n",
+      "43706    Write about a first date that surprises both p...\n",
+      "43707    Write about a first date that surprises both p...\n",
+      "43708    Write about a first date that surprises both p...\n",
+      "43709    Write about a first date that surprises both p...\n",
+      "                               ...                        \n",
+      "43862    Write about a first date that surprises both p...\n",
+      "43863    Write about a first date that surprises both p...\n",
+      "43864    Write about a first date that surprises both p...\n",
+      "43865    Write about a first date that surprises both p...\n",
+      "43866    Write about a first date that surprises both p...\n",
+      "Name: prompt, Length: 162, dtype: object\n",
+      "prompt_1027\n",
+      "47435    Write about two people going sledding for the ...\n",
+      "47436    Write about two people going sledding for the ...\n",
+      "47437    Write about two people going sledding for the ...\n",
+      "47438    Write about two people going sledding for the ...\n",
+      "47439    Write about two people going sledding for the ...\n",
+      "                               ...                        \n",
+      "47561    Write about two people going sledding for the ...\n",
+      "47562    Write about two people going sledding for the ...\n",
+      "47563    Write about two people going sledding for the ...\n",
+      "47564    Write about two people going sledding for the ...\n",
+      "47565    Write about two people going sledding for the ...\n",
+      "Name: prompt, Length: 131, dtype: object\n",
+      "prompt_0985\n",
+      "40711    Start your story with the line, “That’s the th...\n",
+      "40712    Start your story with the line, “That’s the th...\n",
+      "40713    Start your story with the line, “That’s the th...\n",
+      "40714    Start your story with the line, “That’s the th...\n",
+      "40715    Start your story with the line, “That’s the th...\n",
+      "                               ...                        \n",
+      "40992    Start your story with the line, “That’s the th...\n",
+      "40993    Start your story with the line, “That’s the th...\n",
+      "40994    Start your story with the line, “That’s the th...\n",
+      "40995    Start your story with the line, “That’s the th...\n",
+      "40996    Start your story with the line, “That’s the th...\n",
+      "Name: prompt, Length: 286, dtype: object\n",
+      "prompt_0985\n",
+      "40711    Start your story with the line, “That’s the th...\n",
+      "40712    Start your story with the line, “That’s the th...\n",
+      "40713    Start your story with the line, “That’s the th...\n",
+      "40714    Start your story with the line, “That’s the th...\n",
+      "40715    Start your story with the line, “That’s the th...\n",
+      "                               ...                        \n",
+      "40992    Start your story with the line, “That’s the th...\n",
+      "40993    Start your story with the line, “That’s the th...\n",
+      "40994    Start your story with the line, “That’s the th...\n",
+      "40995    Start your story with the line, “That’s the th...\n",
+      "40996    Start your story with the line, “That’s the th...\n",
+      "Name: prompt, Length: 286, dtype: object\n",
+      "prompt_1069\n",
+      "52899    Start your story with the line, “This was supp...\n",
+      "52900    Start your story with the line, “This was supp...\n",
+      "52901    Start your story with the line, “This was supp...\n",
+      "52902    Start your story with the line, “This was supp...\n",
+      "52903    Start your story with the line, “This was supp...\n",
+      "                               ...                        \n",
+      "53026    Start your story with the line, “This was supp...\n",
+      "53027    Start your story with the line, “This was supp...\n",
+      "53028    Start your story with the line, “This was supp...\n",
+      "53029    Start your story with the line, “This was supp...\n",
+      "53030    Start your story with the line, “This was supp...\n",
+      "Name: prompt, Length: 132, dtype: object\n",
+      "prompt_1150\n",
+      "65712    You thought he was dead, but there he is, righ...\n",
+      "65713    You thought he was dead, but there he is, righ...\n",
+      "65714    You thought he was dead, but there he is, righ...\n",
+      "65715    You thought he was dead, but there he is, righ...\n",
+      "65716    You thought he was dead, but there he is, righ...\n",
+      "                               ...                        \n",
+      "66127    You thought he was dead, but there he is, righ...\n",
+      "66128    You thought he was dead, but there he is, righ...\n",
+      "66129    You thought he was dead, but there he is, righ...\n",
+      "66130    You thought he was dead, but there he is, righ...\n",
+      "66131    You thought he was dead, but there he is, righ...\n",
+      "Name: prompt, Length: 420, dtype: object\n",
+      "prompt_0953\n",
+      "37637    Write about a character stumbling upon a libra...\n",
+      "37638    Write about a character stumbling upon a libra...\n",
+      "37639    Write about a character stumbling upon a libra...\n",
+      "37640    Write about a character stumbling upon a libra...\n",
+      "37641    Write about a character stumbling upon a libra...\n",
+      "                               ...                        \n",
+      "37842    Write about a character stumbling upon a libra...\n",
+      "37843    Write about a character stumbling upon a libra...\n",
+      "37844    Write about a character stumbling upon a libra...\n",
+      "37845    Write about a character stumbling upon a libra...\n",
+      "37846    Write about a character stumbling upon a libra...\n",
+      "Name: prompt, Length: 210, dtype: object\n",
+      "prompt_0919\n",
+      "31610    Set your story on (or in) a winding river.\n",
+      "31611    Set your story on (or in) a winding river.\n",
+      "31612    Set your story on (or in) a winding river.\n",
+      "31613    Set your story on (or in) a winding river.\n",
+      "31614    Set your story on (or in) a winding river.\n",
+      "                            ...                    \n",
+      "31730    Set your story on (or in) a winding river.\n",
+      "31731    Set your story on (or in) a winding river.\n",
+      "31732    Set your story on (or in) a winding river.\n",
+      "31733    Set your story on (or in) a winding river.\n",
+      "31734    Set your story on (or in) a winding river.\n",
+      "Name: prompt, Length: 125, dtype: object\n",
+      "prompt_0952\n",
+      "36994    Write a story that begins in the light and end...\n",
+      "36995    Write a story that begins in the light and end...\n",
+      "36996    Write a story that begins in the light and end...\n",
+      "36997    Write a story that begins in the light and end...\n",
+      "36998    Write a story that begins in the light and end...\n",
+      "                               ...                        \n",
+      "37376    Write a story that begins in the light and end...\n",
+      "37377    Write a story that begins in the light and end...\n",
+      "37378    Write a story that begins in the light and end...\n",
+      "37379    Write a story that begins in the light and end...\n",
+      "37380    Write a story that begins in the light and end...\n",
+      "Name: prompt, Length: 387, dtype: object\n",
+      "prompt_1160\n",
+      "68088    Write a story about a person experiencing pre-...\n",
+      "68089    Write a story about a person experiencing pre-...\n",
+      "68090    Write a story about a person experiencing pre-...\n",
+      "68091    Write a story about a person experiencing pre-...\n",
+      "68092    Write a story about a person experiencing pre-...\n",
+      "                               ...                        \n",
+      "68245    Write a story about a person experiencing pre-...\n",
+      "68246    Write a story about a person experiencing pre-...\n",
+      "68247    Write a story about a person experiencing pre-...\n",
+      "68248    Write a story about a person experiencing pre-...\n",
+      "68249    Write a story about a person experiencing pre-...\n",
+      "Name: prompt, Length: 162, dtype: object\n",
+      "prompt_1176\n",
+      "71095    \"Just say it,\" you silently reminded yourself....\n",
+      "71096    \"Just say it,\" you silently reminded yourself....\n",
+      "71097    \"Just say it,\" you silently reminded yourself....\n",
+      "71098    \"Just say it,\" you silently reminded yourself....\n",
+      "71099    \"Just say it,\" you silently reminded yourself....\n",
+      "                               ...                        \n",
+      "71311    \"Just say it,\" you silently reminded yourself....\n",
+      "71312    \"Just say it,\" you silently reminded yourself....\n",
+      "71313    \"Just say it,\" you silently reminded yourself....\n",
+      "71314    \"Just say it,\" you silently reminded yourself....\n",
+      "71315    \"Just say it,\" you silently reminded yourself....\n",
+      "Name: prompt, Length: 221, dtype: object\n",
+      "prompt_1116\n",
+      "60115    Write about a character arriving in a place un...\n",
+      "60116    Write about a character arriving in a place un...\n",
+      "60117    Write about a character arriving in a place un...\n",
+      "60118    Write about a character arriving in a place un...\n",
+      "60119    Write about a character arriving in a place un...\n",
+      "                               ...                        \n",
+      "60311    Write about a character arriving in a place un...\n",
+      "60312    Write about a character arriving in a place un...\n",
+      "60313    Write about a character arriving in a place un...\n",
+      "60314    Write about a character arriving in a place un...\n",
+      "60315    Write about a character arriving in a place un...\n",
+      "Name: prompt, Length: 201, dtype: object\n",
+      "prompt_1115\n",
+      "60046    Set your story in a place with extreme weather...\n",
+      "60047    Set your story in a place with extreme weather...\n",
+      "60048    Set your story in a place with extreme weather...\n",
+      "60049    Set your story in a place with extreme weather...\n",
+      "60050    Set your story in a place with extreme weather...\n",
+      "                               ...                        \n",
+      "60110    Set your story in a place with extreme weather...\n",
+      "60111    Set your story in a place with extreme weather...\n",
+      "60112    Set your story in a place with extreme weather...\n",
+      "60113    Set your story in a place with extreme weather...\n",
+      "60114    Set your story in a place with extreme weather...\n",
+      "Name: prompt, Length: 69, dtype: object\n",
+      "prompt_1165\n",
+      "68891    Write a story about waiting — but don't reveal...\n",
+      "68892    Write a story about waiting — but don't reveal...\n",
+      "68893    Write a story about waiting — but don't reveal...\n",
+      "68894    Write a story about waiting — but don't reveal...\n",
+      "68895    Write a story about waiting — but don't reveal...\n",
+      "                               ...                        \n",
+      "69211    Write a story about waiting — but don't reveal...\n",
+      "69212    Write a story about waiting — but don't reveal...\n",
+      "69213    Write a story about waiting — but don't reveal...\n",
+      "69214    Write a story about waiting — but don't reveal...\n",
+      "69215    Write a story about waiting — but don't reveal...\n",
+      "Name: prompt, Length: 325, dtype: object\n",
+      "prompt_1077\n",
+      "54378    Write about a pirate captain obsessed with fin...\n",
+      "54379    Write about a pirate captain obsessed with fin...\n",
+      "54380    Write about a pirate captain obsessed with fin...\n",
+      "54381    Write about a pirate captain obsessed with fin...\n",
+      "54382    Write about a pirate captain obsessed with fin...\n",
+      "                               ...                        \n",
+      "54518    Write about a pirate captain obsessed with fin...\n",
+      "54519    Write about a pirate captain obsessed with fin...\n",
+      "54520    Write about a pirate captain obsessed with fin...\n",
+      "54521    Write about a pirate captain obsessed with fin...\n",
+      "54522    Write about a pirate captain obsessed with fin...\n",
+      "Name: prompt, Length: 145, dtype: object\n",
+      "prompt_0931\n",
+      "33661    Start your story with the arrival of a strange...\n",
+      "33662    Start your story with the arrival of a strange...\n",
+      "33663    Start your story with the arrival of a strange...\n",
+      "33664    Start your story with the arrival of a strange...\n",
+      "33665    Start your story with the arrival of a strange...\n",
+      "                               ...                        \n",
+      "33846    Start your story with the arrival of a strange...\n",
+      "33847    Start your story with the arrival of a strange...\n",
+      "33848    Start your story with the arrival of a strange...\n",
+      "33849    Start your story with the arrival of a strange...\n",
+      "33850    Start your story with the arrival of a strange...\n",
+      "Name: prompt, Length: 190, dtype: object\n",
+      "prompt_0960\n",
+      "38500    Set your story in a world living with the cons...\n",
+      "38501    Set your story in a world living with the cons...\n",
+      "38502    Set your story in a world living with the cons...\n",
+      "38503    Set your story in a world living with the cons...\n",
+      "38504    Set your story in a world living with the cons...\n",
+      "                               ...                        \n",
+      "38630    Set your story in a world living with the cons...\n",
+      "38631    Set your story in a world living with the cons...\n",
+      "38632    Set your story in a world living with the cons...\n",
+      "38633    Set your story in a world living with the cons...\n",
+      "38634    Set your story in a world living with the cons...\n",
+      "Name: prompt, Length: 135, dtype: object\n",
+      "prompt_1189\n",
+      "72488    Write a story that starts with a character-rev...\n",
+      "72489    Write a story that starts with a character-rev...\n",
+      "72490    Write a story that starts with a character-rev...\n",
+      "72491    Write a story that starts with a character-rev...\n",
+      "72492    Write a story that starts with a character-rev...\n",
+      "                               ...                        \n",
+      "72626    Write a story that starts with a character-rev...\n",
+      "72627    Write a story that starts with a character-rev...\n",
+      "72628    Write a story that starts with a character-rev...\n",
+      "72629    Write a story that starts with a character-rev...\n",
+      "72630    Write a story that starts with a character-rev...\n",
+      "Name: prompt, Length: 143, dtype: object\n",
+      "prompt_0146\n",
+      "7765    Write a story entirely of dialogue. Nothing bu...\n",
+      "7766    Write a story entirely of dialogue. Nothing bu...\n",
+      "7767    Write a story entirely of dialogue. Nothing bu...\n",
+      "7768    Write a story entirely of dialogue. Nothing bu...\n",
+      "7769    Write a story entirely of dialogue. Nothing bu...\n",
+      "                              ...                        \n",
+      "8044    Write a story entirely of dialogue. Nothing bu...\n",
+      "8045    Write a story entirely of dialogue. Nothing bu...\n",
+      "8046    Write a story entirely of dialogue. Nothing bu...\n",
+      "8047    Write a story entirely of dialogue. Nothing bu...\n",
+      "8048    Write a story entirely of dialogue. Nothing bu...\n",
+      "Name: prompt, Length: 284, dtype: object\n",
+      "prompt_0932\n",
+      "33393    Write a story about strangers becoming friends...\n",
+      "33394    Write a story about strangers becoming friends...\n",
+      "33395    Write a story about strangers becoming friends...\n",
+      "33396    Write a story about strangers becoming friends...\n",
+      "33397    Write a story about strangers becoming friends...\n",
+      "                               ...                        \n",
+      "33656    Write a story about strangers becoming friends...\n",
+      "33657    Write a story about strangers becoming friends...\n",
+      "33658    Write a story about strangers becoming friends...\n",
+      "33659    Write a story about strangers becoming friends...\n",
+      "33660    Write a story about strangers becoming friends...\n",
+      "Name: prompt, Length: 268, dtype: object\n",
+      "prompt_1142\n",
+      "64471    Write a story about high school sweethearts co...\n",
+      "64472    Write a story about high school sweethearts co...\n",
+      "64473    Write a story about high school sweethearts co...\n",
+      "64474    Write a story about high school sweethearts co...\n",
+      "64475    Write a story about high school sweethearts co...\n",
+      "                               ...                        \n",
+      "64753    Write a story about high school sweethearts co...\n",
+      "64754    Write a story about high school sweethearts co...\n",
+      "64755    Write a story about high school sweethearts co...\n",
+      "64756    Write a story about high school sweethearts co...\n",
+      "64757    Write a story about high school sweethearts co...\n",
+      "Name: prompt, Length: 287, dtype: object\n",
+      "prompt_1096\n",
+      "57385    Start your story with the line, “By the time I...\n",
+      "57386    Start your story with the line, “By the time I...\n",
+      "57387    Start your story with the line, “By the time I...\n",
+      "57388    Start your story with the line, “By the time I...\n",
+      "57389    Start your story with the line, “By the time I...\n",
+      "                               ...                        \n",
+      "57706    Start your story with the line, “By the time I...\n",
+      "57707    Start your story with the line, “By the time I...\n",
+      "57708    Start your story with the line, “By the time I...\n",
+      "57709    Start your story with the line, “By the time I...\n",
+      "57710    Start your story with the line, “By the time I...\n",
+      "Name: prompt, Length: 326, dtype: object\n",
+      "prompt_1177\n",
+      "70881    As you check your mail, you notice a letter th...\n",
+      "70882    As you check your mail, you notice a letter th...\n",
+      "70883    As you check your mail, you notice a letter th...\n",
+      "70884    As you check your mail, you notice a letter th...\n",
+      "70885    As you check your mail, you notice a letter th...\n",
+      "                               ...                        \n",
+      "71090    As you check your mail, you notice a letter th...\n",
+      "71091    As you check your mail, you notice a letter th...\n",
+      "71092    As you check your mail, you notice a letter th...\n",
+      "71093    As you check your mail, you notice a letter th...\n",
+      "71094    As you check your mail, you notice a letter th...\n",
+      "Name: prompt, Length: 214, dtype: object\n",
+      "prompt_1168\n",
+      "69648    Write a story that features a protagonist with...\n",
+      "69649    Write a story that features a protagonist with...\n",
+      "69650    Write a story that features a protagonist with...\n",
+      "69651    Write a story that features a protagonist with...\n",
+      "69652    Write a story that features a protagonist with...\n",
+      "                               ...                        \n",
+      "69756    Write a story that features a protagonist with...\n",
+      "69757    Write a story that features a protagonist with...\n",
+      "69758    Write a story that features a protagonist with...\n",
+      "69759    Write a story that features a protagonist with...\n",
+      "69760    Write a story that features a protagonist with...\n",
+      "Name: prompt, Length: 113, dtype: object\n",
+      "prompt_1158\n",
+      "67634    Write a story about a summer afternoon spent i...\n",
+      "67635    Write a story about a summer afternoon spent i...\n",
+      "67636    Write a story about a summer afternoon spent i...\n",
+      "67637    Write a story about a summer afternoon spent i...\n",
+      "67638    Write a story about a summer afternoon spent i...\n",
+      "                               ...                        \n",
+      "67937    Write a story about a summer afternoon spent i...\n",
+      "67938    Write a story about a summer afternoon spent i...\n",
+      "67939    Write a story about a summer afternoon spent i...\n",
+      "67940    Write a story about a summer afternoon spent i...\n",
+      "67941    Write a story about a summer afternoon spent i...\n",
+      "Name: prompt, Length: 308, dtype: object\n",
+      "prompt_0937\n",
+      "34660    Start your story with someone being presented ...\n",
+      "34661    Start your story with someone being presented ...\n",
+      "34662    Start your story with someone being presented ...\n",
+      "34663    Start your story with someone being presented ...\n",
+      "34664    Start your story with someone being presented ...\n",
+      "                               ...                        \n",
+      "34876    Start your story with someone being presented ...\n",
+      "34877    Start your story with someone being presented ...\n",
+      "34878    Start your story with someone being presented ...\n",
+      "34879    Start your story with someone being presented ...\n",
+      "34880    Start your story with someone being presented ...\n",
+      "Name: prompt, Length: 221, dtype: object\n",
+      "prompt_0044\n",
+      "2338    Write about a person who constantly has to put...\n",
+      "2339    Write about a person who constantly has to put...\n",
+      "2340    Write about a person who constantly has to put...\n",
+      "2341    Write about a person who constantly has to put...\n",
+      "2342    Write about a person who constantly has to put...\n",
+      "                              ...                        \n",
+      "2416    Write about a person who constantly has to put...\n",
+      "2417    Write about a person who constantly has to put...\n",
+      "2418    Write about a person who constantly has to put...\n",
+      "2419    Write about a person who constantly has to put...\n",
+      "2420    Write about a person who constantly has to put...\n",
+      "Name: prompt, Length: 83, dtype: object\n",
+      "prompt_1142\n",
+      "64471    Write a story about high school sweethearts co...\n",
+      "64472    Write a story about high school sweethearts co...\n",
+      "64473    Write a story about high school sweethearts co...\n",
+      "64474    Write a story about high school sweethearts co...\n",
+      "64475    Write a story about high school sweethearts co...\n",
+      "                               ...                        \n",
+      "64753    Write a story about high school sweethearts co...\n",
+      "64754    Write a story about high school sweethearts co...\n",
+      "64755    Write a story about high school sweethearts co...\n",
+      "64756    Write a story about high school sweethearts co...\n",
+      "64757    Write a story about high school sweethearts co...\n",
+      "Name: prompt, Length: 287, dtype: object\n",
+      "prompt_1167\n",
+      "69390    Write a story that takes place in a waiting room.\n",
+      "69391    Write a story that takes place in a waiting room.\n",
+      "69392    Write a story that takes place in a waiting room.\n",
+      "69393    Write a story that takes place in a waiting room.\n",
+      "69394    Write a story that takes place in a waiting room.\n",
+      "                               ...                        \n",
+      "69643    Write a story that takes place in a waiting room.\n",
+      "69644    Write a story that takes place in a waiting room.\n",
+      "69645    Write a story that takes place in a waiting room.\n",
+      "69646    Write a story that takes place in a waiting room.\n",
+      "69647    Write a story that takes place in a waiting room.\n",
+      "Name: prompt, Length: 258, dtype: object\n",
+      "prompt_1274\n",
+      "81742    Write a short story that ends with a twist.\n",
+      "81743    Write a short story that ends with a twist.\n",
+      "81744    Write a short story that ends with a twist.\n",
+      "81745    Write a short story that ends with a twist.\n",
+      "81746    Write a short story that ends with a twist.\n",
+      "                            ...                     \n",
+      "81894    Write a short story that ends with a twist.\n",
+      "81895    Write a short story that ends with a twist.\n",
+      "81896    Write a short story that ends with a twist.\n",
+      "81897    Write a short story that ends with a twist.\n",
+      "81898    Write a short story that ends with a twist.\n",
+      "Name: prompt, Length: 157, dtype: object\n",
+      "prompt_1017\n",
+      "45613    Write about someone who decides it’s time to c...\n",
+      "45614    Write about someone who decides it’s time to c...\n",
+      "45615    Write about someone who decides it’s time to c...\n",
+      "45616    Write about someone who decides it’s time to c...\n",
+      "45617    Write about someone who decides it’s time to c...\n",
+      "                               ...                        \n",
+      "45873    Write about someone who decides it’s time to c...\n",
+      "45874    Write about someone who decides it’s time to c...\n",
+      "45875    Write about someone who decides it’s time to c...\n",
+      "45876    Write about someone who decides it’s time to c...\n",
+      "45877    Write about someone who decides it’s time to c...\n",
+      "Name: prompt, Length: 265, dtype: object\n",
+      "prompt_0638\n",
+      "15353    Write a story featuring an element of time-tra...\n",
+      "15354    Write a story featuring an element of time-tra...\n",
+      "15355    Write a story featuring an element of time-tra...\n",
+      "15356    Write a story featuring an element of time-tra...\n",
+      "15357    Write a story featuring an element of time-tra...\n",
+      "                               ...                        \n",
+      "15416    Write a story featuring an element of time-tra...\n",
+      "15417    Write a story featuring an element of time-tra...\n",
+      "15418    Write a story featuring an element of time-tra...\n",
+      "15419    Write a story featuring an element of time-tra...\n",
+      "15420    Write a story featuring an element of time-tra...\n",
+      "Name: prompt, Length: 68, dtype: object\n",
+      "prompt_1197\n",
+      "73639    Write a story about transformation.\n",
+      "73640    Write a story about transformation.\n",
+      "73641    Write a story about transformation.\n",
+      "73642    Write a story about transformation.\n",
+      "73643    Write a story about transformation.\n",
+      "                        ...                 \n",
+      "73807    Write a story about transformation.\n",
+      "73808    Write a story about transformation.\n",
+      "73809    Write a story about transformation.\n",
+      "73810    Write a story about transformation.\n",
+      "73811    Write a story about transformation.\n",
+      "Name: prompt, Length: 173, dtype: object\n",
+      "prompt_0935\n",
+      "34213    A character stands in front of two doors. Writ...\n",
+      "34214    A character stands in front of two doors. Writ...\n",
+      "34215    A character stands in front of two doors. Writ...\n",
+      "34216    A character stands in front of two doors. Writ...\n",
+      "34217    A character stands in front of two doors. Writ...\n",
+      "                               ...                        \n",
+      "34448    A character stands in front of two doors. Writ...\n",
+      "34449    A character stands in front of two doors. Writ...\n",
+      "34450    A character stands in front of two doors. Writ...\n",
+      "34451    A character stands in front of two doors. Writ...\n",
+      "34452    A character stands in front of two doors. Writ...\n",
+      "Name: prompt, Length: 240, dtype: object\n",
+      "prompt_0620\n",
+      "14572    Write a story where the law plays an important...\n",
+      "14573    Write a story where the law plays an important...\n",
+      "14574    Write a story where the law plays an important...\n",
+      "14575    Write a story where the law plays an important...\n",
+      "14576    Write a story where the law plays an important...\n",
+      "14577    Write a story where the law plays an important...\n",
+      "14578    Write a story where the law plays an important...\n",
+      "14579    Write a story where the law plays an important...\n",
+      "14580    Write a story where the law plays an important...\n",
+      "14581    Write a story where the law plays an important...\n",
+      "14582    Write a story where the law plays an important...\n",
+      "14583    Write a story where the law plays an important...\n",
+      "14584    Write a story where the law plays an important...\n",
+      "14585    Write a story where the law plays an important...\n",
+      "14586    Write a story where the law plays an important...\n",
+      "14587    Write a story where the law plays an important...\n",
+      "14588    Write a story where the law plays an important...\n",
+      "14589    Write a story where the law plays an important...\n",
+      "14590    Write a story where the law plays an important...\n",
+      "14591    Write a story where the law plays an important...\n",
+      "14592    Write a story where the law plays an important...\n",
+      "14593    Write a story where the law plays an important...\n",
+      "14594    Write a story where the law plays an important...\n",
+      "14595    Write a story where the law plays an important...\n",
+      "Name: prompt, dtype: object\n",
+      "prompt_0923\n",
+      "32047    Set your story within a window of opportunity,...\n",
+      "32048    Set your story within a window of opportunity,...\n",
+      "32049    Set your story within a window of opportunity,...\n",
+      "32050    Set your story within a window of opportunity,...\n",
+      "32051    Set your story within a window of opportunity,...\n",
+      "                               ...                        \n",
+      "32189    Set your story within a window of opportunity,...\n",
+      "32190    Set your story within a window of opportunity,...\n",
+      "32191    Set your story within a window of opportunity,...\n",
+      "32192    Set your story within a window of opportunity,...\n",
+      "32193    Set your story within a window of opportunity,...\n",
+      "Name: prompt, Length: 147, dtype: object\n",
+      "prompt_0912\n",
+      "29891    Start or end your story with two characters si...\n",
+      "29892    Start or end your story with two characters si...\n",
+      "29893    Start or end your story with two characters si...\n",
+      "29894    Start or end your story with two characters si...\n",
+      "29895    Start or end your story with two characters si...\n",
+      "                               ...                        \n",
+      "30153    Start or end your story with two characters si...\n",
+      "30154    Start or end your story with two characters si...\n",
+      "30155    Start or end your story with two characters si...\n",
+      "30156    Start or end your story with two characters si...\n",
+      "30157    Start or end your story with two characters si...\n",
+      "Name: prompt, Length: 267, dtype: object\n",
+      "prompt_1032\n",
+      "48007    Write a story told exclusively through dialogue.\n",
+      "48008    Write a story told exclusively through dialogue.\n",
+      "48009    Write a story told exclusively through dialogue.\n",
+      "48010    Write a story told exclusively through dialogue.\n",
+      "48011    Write a story told exclusively through dialogue.\n",
+      "                               ...                       \n",
+      "48356    Write a story told exclusively through dialogue.\n",
+      "48357    Write a story told exclusively through dialogue.\n",
+      "48358    Write a story told exclusively through dialogue.\n",
+      "48359    Write a story told exclusively through dialogue.\n",
+      "48360    Write a story told exclusively through dialogue.\n",
+      "Name: prompt, Length: 354, dtype: object\n",
+      "prompt_1225\n",
+      "77452    Write a story that takes place in the woods.\n",
+      "77453    Write a story that takes place in the woods.\n",
+      "77454    Write a story that takes place in the woods.\n",
+      "77455    Write a story that takes place in the woods.\n",
+      "77456    Write a story that takes place in the woods.\n",
+      "                             ...                     \n",
+      "77628    Write a story that takes place in the woods.\n",
+      "77629    Write a story that takes place in the woods.\n",
+      "77630    Write a story that takes place in the woods.\n",
+      "77631    Write a story that takes place in the woods.\n",
+      "77632    Write a story that takes place in the woods.\n",
+      "Name: prompt, Length: 181, dtype: object\n",
+      "prompt_0958\n",
+      "38022    Start your story with someone sitting on a cro...\n",
+      "38023    Start your story with someone sitting on a cro...\n",
+      "38024    Start your story with someone sitting on a cro...\n",
+      "38025    Start your story with someone sitting on a cro...\n",
+      "38026    Start your story with someone sitting on a cro...\n",
+      "                               ...                        \n",
+      "38279    Start your story with someone sitting on a cro...\n",
+      "38280    Start your story with someone sitting on a cro...\n",
+      "38281    Start your story with someone sitting on a cro...\n",
+      "38282    Start your story with someone sitting on a cro...\n",
+      "38283    Start your story with someone sitting on a cro...\n",
+      "Name: prompt, Length: 262, dtype: object\n",
+      "prompt_1013\n",
+      "44965    Write about a “found family” who are finally a...\n",
+      "44966    Write about a “found family” who are finally a...\n",
+      "44967    Write about a “found family” who are finally a...\n",
+      "44968    Write about a “found family” who are finally a...\n",
+      "44969    Write about a “found family” who are finally a...\n",
+      "                               ...                        \n",
+      "45061    Write about a “found family” who are finally a...\n",
+      "45062    Write about a “found family” who are finally a...\n",
+      "45063    Write about a “found family” who are finally a...\n",
+      "45064    Write about a “found family” who are finally a...\n",
+      "45065    Write about a “found family” who are finally a...\n",
+      "Name: prompt, Length: 101, dtype: object\n",
+      "prompt_0998\n",
+      "42817    Write about an android just trying to blend in...\n",
+      "42818    Write about an android just trying to blend in...\n",
+      "42819    Write about an android just trying to blend in...\n",
+      "42820    Write about an android just trying to blend in...\n",
+      "42821    Write about an android just trying to blend in...\n",
+      "                               ...                        \n",
+      "43037    Write about an android just trying to blend in...\n",
+      "43038    Write about an android just trying to blend in...\n",
+      "43039    Write about an android just trying to blend in...\n",
+      "43040    Write about an android just trying to blend in...\n",
+      "43041    Write about an android just trying to blend in...\n",
+      "Name: prompt, Length: 225, dtype: object\n",
+      "prompt_0986\n",
+      "40997    Write a story that takes place in the same bui...\n",
+      "40998    Write a story that takes place in the same bui...\n",
+      "40999    Write a story that takes place in the same bui...\n",
+      "41000    Write a story that takes place in the same bui...\n",
+      "41001    Write a story that takes place in the same bui...\n",
+      "                               ...                        \n",
+      "41148    Write a story that takes place in the same bui...\n",
+      "41149    Write a story that takes place in the same bui...\n",
+      "41150    Write a story that takes place in the same bui...\n",
+      "41151    Write a story that takes place in the same bui...\n",
+      "41152    Write a story that takes place in the same bui...\n",
+      "Name: prompt, Length: 156, dtype: object\n",
+      "prompt_0980\n",
+      "40220    Write a fairy tale about someone who can commu...\n",
+      "40221    Write a fairy tale about someone who can commu...\n",
+      "40222    Write a fairy tale about someone who can commu...\n",
+      "40223    Write a fairy tale about someone who can commu...\n",
+      "40224    Write a fairy tale about someone who can commu...\n",
+      "                               ...                        \n",
+      "40384    Write a fairy tale about someone who can commu...\n",
+      "40385    Write a fairy tale about someone who can commu...\n",
+      "40386    Write a fairy tale about someone who can commu...\n",
+      "40387    Write a fairy tale about someone who can commu...\n",
+      "40388    Write a fairy tale about someone who can commu...\n",
+      "Name: prompt, Length: 169, dtype: object\n",
+      "prompt_1168\n",
+      "69648    Write a story that features a protagonist with...\n",
+      "69649    Write a story that features a protagonist with...\n",
+      "69650    Write a story that features a protagonist with...\n",
+      "69651    Write a story that features a protagonist with...\n",
+      "69652    Write a story that features a protagonist with...\n",
+      "                               ...                        \n",
+      "69756    Write a story that features a protagonist with...\n",
+      "69757    Write a story that features a protagonist with...\n",
+      "69758    Write a story that features a protagonist with...\n",
+      "69759    Write a story that features a protagonist with...\n",
+      "69760    Write a story that features a protagonist with...\n",
+      "Name: prompt, Length: 113, dtype: object\n",
+      "prompt_1105\n",
+      "58423    Write about a character who smells something f...\n",
+      "58424    Write about a character who smells something f...\n",
+      "58425    Write about a character who smells something f...\n",
+      "58426    Write about a character who smells something f...\n",
+      "58427    Write about a character who smells something f...\n",
+      "                               ...                        \n",
+      "58639    Write about a character who smells something f...\n",
+      "58640    Write about a character who smells something f...\n",
+      "58641    Write about a character who smells something f...\n",
+      "58642    Write about a character who smells something f...\n",
+      "58643    Write about a character who smells something f...\n",
+      "Name: prompt, Length: 221, dtype: object\n",
+      "prompt_1159\n",
+      "67942    Write a story told entirely through one chase ...\n",
+      "67943    Write a story told entirely through one chase ...\n",
+      "67944    Write a story told entirely through one chase ...\n",
+      "67945    Write a story told entirely through one chase ...\n",
+      "67946    Write a story told entirely through one chase ...\n",
+      "                               ...                        \n",
+      "68083    Write a story told entirely through one chase ...\n",
+      "68084    Write a story told entirely through one chase ...\n",
+      "68085    Write a story told entirely through one chase ...\n",
+      "68086    Write a story told entirely through one chase ...\n",
+      "68087    Write a story told entirely through one chase ...\n",
+      "Name: prompt, Length: 146, dtype: object\n",
+      "prompt_1186\n",
+      "72443    Write a story about activism.\n",
+      "72444    Write a story about activism.\n",
+      "72445    Write a story about activism.\n",
+      "72446    Write a story about activism.\n",
+      "72447    Write a story about activism.\n",
+      "72448    Write a story about activism.\n",
+      "72449    Write a story about activism.\n",
+      "72450    Write a story about activism.\n",
+      "72451    Write a story about activism.\n",
+      "72452    Write a story about activism.\n",
+      "72453    Write a story about activism.\n",
+      "72454    Write a story about activism.\n",
+      "72455    Write a story about activism.\n",
+      "72456    Write a story about activism.\n",
+      "72457    Write a story about activism.\n",
+      "72458    Write a story about activism.\n",
+      "72459    Write a story about activism.\n",
+      "72460    Write a story about activism.\n",
+      "72461    Write a story about activism.\n",
+      "72462    Write a story about activism.\n",
+      "72463    Write a story about activism.\n",
+      "72464    Write a story about activism.\n",
+      "72465    Write a story about activism.\n",
+      "72466    Write a story about activism.\n",
+      "72467    Write a story about activism.\n",
+      "72468    Write a story about activism.\n",
+      "72469    Write a story about activism.\n",
+      "72470    Write a story about activism.\n",
+      "72471    Write a story about activism.\n",
+      "72472    Write a story about activism.\n",
+      "72473    Write a story about activism.\n",
+      "72474    Write a story about activism.\n",
+      "72475    Write a story about activism.\n",
+      "72476    Write a story about activism.\n",
+      "72477    Write a story about activism.\n",
+      "72478    Write a story about activism.\n",
+      "72479    Write a story about activism.\n",
+      "72480    Write a story about activism.\n",
+      "72481    Write a story about activism.\n",
+      "72482    Write a story about activism.\n",
+      "72483    Write a story about activism.\n",
+      "72484    Write a story about activism.\n",
+      "72485    Write a story about activism.\n",
+      "72486    Write a story about activism.\n",
+      "72487    Write a story about activism.\n",
+      "Name: prompt, dtype: object\n",
+      "prompt_1114\n",
+      "59814    Write a story that feels lonely, despite being...\n",
+      "59815    Write a story that feels lonely, despite being...\n",
+      "59816    Write a story that feels lonely, despite being...\n",
+      "59817    Write a story that feels lonely, despite being...\n",
+      "59818    Write a story that feels lonely, despite being...\n",
+      "                               ...                        \n",
+      "60041    Write a story that feels lonely, despite being...\n",
+      "60042    Write a story that feels lonely, despite being...\n",
+      "60043    Write a story that feels lonely, despite being...\n",
+      "60044    Write a story that feels lonely, despite being...\n",
+      "60045    Write a story that feels lonely, despite being...\n",
+      "Name: prompt, Length: 232, dtype: object\n",
+      "prompt_0701\n",
+      "18109    Write a story about a librarian that doesn’t f...\n",
+      "18110    Write a story about a librarian that doesn’t f...\n",
+      "18111    Write a story about a librarian that doesn’t f...\n",
+      "18112    Write a story about a librarian that doesn’t f...\n",
+      "18113    Write a story about a librarian that doesn’t f...\n",
+      "                               ...                        \n",
+      "18168    Write a story about a librarian that doesn’t f...\n",
+      "18169    Write a story about a librarian that doesn’t f...\n",
+      "18170    Write a story about a librarian that doesn’t f...\n",
+      "18171    Write a story about a librarian that doesn’t f...\n",
+      "18172    Write a story about a librarian that doesn’t f...\n",
+      "Name: prompt, Length: 64, dtype: object\n",
+      "prompt_1061\n",
+      "52046    Write about a character who everyone thinks is...\n",
+      "52047    Write about a character who everyone thinks is...\n",
+      "52048    Write about a character who everyone thinks is...\n",
+      "52049    Write about a character who everyone thinks is...\n",
+      "52050    Write about a character who everyone thinks is...\n",
+      "                               ...                        \n",
+      "52222    Write about a character who everyone thinks is...\n",
+      "52223    Write about a character who everyone thinks is...\n",
+      "52224    Write about a character who everyone thinks is...\n",
+      "52225    Write about a character who everyone thinks is...\n",
+      "52226    Write about a character who everyone thinks is...\n",
+      "Name: prompt, Length: 181, dtype: object\n",
+      "prompt_0925\n",
+      "32333    Write a story that involves a magic window — o...\n",
+      "32334    Write a story that involves a magic window — o...\n",
+      "32335    Write a story that involves a magic window — o...\n",
+      "32336    Write a story that involves a magic window — o...\n",
+      "32337    Write a story that involves a magic window — o...\n",
+      "                               ...                        \n",
+      "32484    Write a story that involves a magic window — o...\n",
+      "32485    Write a story that involves a magic window — o...\n",
+      "32486    Write a story that involves a magic window — o...\n",
+      "32487    Write a story that involves a magic window — o...\n",
+      "32488    Write a story that involves a magic window — o...\n",
+      "Name: prompt, Length: 156, dtype: object\n",
+      "prompt_1018\n",
+      "45878    Write about someone who keeps picking up diffe...\n",
+      "45879    Write about someone who keeps picking up diffe...\n",
+      "45880    Write about someone who keeps picking up diffe...\n",
+      "45881    Write about someone who keeps picking up diffe...\n",
+      "45882    Write about someone who keeps picking up diffe...\n",
+      "                               ...                        \n",
+      "46050    Write about someone who keeps picking up diffe...\n",
+      "46051    Write about someone who keeps picking up diffe...\n",
+      "46052    Write about someone who keeps picking up diffe...\n",
+      "46053    Write about someone who keeps picking up diffe...\n",
+      "46054    Write about someone who keeps picking up diffe...\n",
+      "Name: prompt, Length: 177, dtype: object\n",
+      "prompt_1094\n",
+      "57046    Write a story from the perspective of a bird m...\n",
+      "57047    Write a story from the perspective of a bird m...\n",
+      "57048    Write a story from the perspective of a bird m...\n",
+      "57049    Write a story from the perspective of a bird m...\n",
+      "57050    Write a story from the perspective of a bird m...\n",
+      "                               ...                        \n",
+      "57171    Write a story from the perspective of a bird m...\n",
+      "57172    Write a story from the perspective of a bird m...\n",
+      "57173    Write a story from the perspective of a bird m...\n",
+      "57174    Write a story from the perspective of a bird m...\n",
+      "57175    Write a story from the perspective of a bird m...\n",
+      "Name: prompt, Length: 130, dtype: object\n",
+      "prompt_1167\n",
+      "69390    Write a story that takes place in a waiting room.\n",
+      "69391    Write a story that takes place in a waiting room.\n",
+      "69392    Write a story that takes place in a waiting room.\n",
+      "69393    Write a story that takes place in a waiting room.\n",
+      "69394    Write a story that takes place in a waiting room.\n",
+      "                               ...                        \n",
+      "69643    Write a story that takes place in a waiting room.\n",
+      "69644    Write a story that takes place in a waiting room.\n",
+      "69645    Write a story that takes place in a waiting room.\n",
+      "69646    Write a story that takes place in a waiting room.\n",
+      "69647    Write a story that takes place in a waiting room.\n",
+      "Name: prompt, Length: 258, dtype: object\n",
+      "prompt_1017\n",
+      "45613    Write about someone who decides it’s time to c...\n",
+      "45614    Write about someone who decides it’s time to c...\n",
+      "45615    Write about someone who decides it’s time to c...\n",
+      "45616    Write about someone who decides it’s time to c...\n",
+      "45617    Write about someone who decides it’s time to c...\n",
+      "                               ...                        \n",
+      "45873    Write about someone who decides it’s time to c...\n",
+      "45874    Write about someone who decides it’s time to c...\n",
+      "45875    Write about someone who decides it’s time to c...\n",
+      "45876    Write about someone who decides it’s time to c...\n",
+      "45877    Write about someone who decides it’s time to c...\n",
+      "Name: prompt, Length: 265, dtype: object\n",
+      "prompt_1072\n",
+      "53481    Start your story with two characters watching ...\n",
+      "53482    Start your story with two characters watching ...\n",
+      "53483    Start your story with two characters watching ...\n",
+      "53484    Start your story with two characters watching ...\n",
+      "53485    Start your story with two characters watching ...\n",
+      "                               ...                        \n",
+      "53780    Start your story with two characters watching ...\n",
+      "53781    Start your story with two characters watching ...\n",
+      "53782    Start your story with two characters watching ...\n",
+      "53783    Start your story with two characters watching ...\n",
+      "53784    Start your story with two characters watching ...\n",
+      "Name: prompt, Length: 304, dtype: object\n",
+      "prompt_0796\n",
+      "22465    Write about a character who always wears a mas...\n",
+      "22466    Write about a character who always wears a mas...\n",
+      "22467    Write about a character who always wears a mas...\n",
+      "22468    Write about a character who always wears a mas...\n",
+      "22469    Write about a character who always wears a mas...\n",
+      "22470    Write about a character who always wears a mas...\n",
+      "22471    Write about a character who always wears a mas...\n",
+      "22472    Write about a character who always wears a mas...\n",
+      "22473    Write about a character who always wears a mas...\n",
+      "22474    Write about a character who always wears a mas...\n",
+      "22475    Write about a character who always wears a mas...\n",
+      "22476    Write about a character who always wears a mas...\n",
+      "22477    Write about a character who always wears a mas...\n",
+      "22478    Write about a character who always wears a mas...\n",
+      "22479    Write about a character who always wears a mas...\n",
+      "22480    Write about a character who always wears a mas...\n",
+      "22481    Write about a character who always wears a mas...\n",
+      "22482    Write about a character who always wears a mas...\n",
+      "22483    Write about a character who always wears a mas...\n",
+      "22484    Write about a character who always wears a mas...\n",
+      "22485    Write about a character who always wears a mas...\n",
+      "22486    Write about a character who always wears a mas...\n",
+      "22487    Write about a character who always wears a mas...\n",
+      "22488    Write about a character who always wears a mas...\n",
+      "22489    Write about a character who always wears a mas...\n",
+      "22490    Write about a character who always wears a mas...\n",
+      "22491    Write about a character who always wears a mas...\n",
+      "22492    Write about a character who always wears a mas...\n",
+      "22493    Write about a character who always wears a mas...\n",
+      "22494    Write about a character who always wears a mas...\n",
+      "22495    Write about a character who always wears a mas...\n",
+      "22496    Write about a character who always wears a mas...\n",
+      "22497    Write about a character who always wears a mas...\n",
+      "22498    Write about a character who always wears a mas...\n",
+      "22499    Write about a character who always wears a mas...\n",
+      "22500    Write about a character who always wears a mas...\n",
+      "22501    Write about a character who always wears a mas...\n",
+      "22502    Write about a character who always wears a mas...\n",
+      "22503    Write about a character who always wears a mas...\n",
+      "22504    Write about a character who always wears a mas...\n",
+      "22505    Write about a character who always wears a mas...\n",
+      "22506    Write about a character who always wears a mas...\n",
+      "22507    Write about a character who always wears a mas...\n",
+      "22508    Write about a character who always wears a mas...\n",
+      "22509    Write about a character who always wears a mas...\n",
+      "22510    Write about a character who always wears a mas...\n",
+      "22511    Write about a character who always wears a mas...\n",
+      "22512    Write about a character who always wears a mas...\n",
+      "22513    Write about a character who always wears a mas...\n",
+      "Name: prompt, dtype: object\n",
+      "prompt_0862\n",
+      "25026    Set your story in a roadside diner.\n",
+      "25027    Set your story in a roadside diner.\n",
+      "25028    Set your story in a roadside diner.\n",
+      "25029    Set your story in a roadside diner.\n",
+      "25030    Set your story in a roadside diner.\n",
+      "25031    Set your story in a roadside diner.\n",
+      "25032    Set your story in a roadside diner.\n",
+      "25033    Set your story in a roadside diner.\n",
+      "25034    Set your story in a roadside diner.\n",
+      "25035    Set your story in a roadside diner.\n",
+      "25036    Set your story in a roadside diner.\n",
+      "25037    Set your story in a roadside diner.\n",
+      "25038    Set your story in a roadside diner.\n",
+      "25039    Set your story in a roadside diner.\n",
+      "25040    Set your story in a roadside diner.\n",
+      "25041    Set your story in a roadside diner.\n",
+      "25042    Set your story in a roadside diner.\n",
+      "25043    Set your story in a roadside diner.\n",
+      "25044    Set your story in a roadside diner.\n",
+      "25045    Set your story in a roadside diner.\n",
+      "25046    Set your story in a roadside diner.\n",
+      "25047    Set your story in a roadside diner.\n",
+      "25048    Set your story in a roadside diner.\n",
+      "25049    Set your story in a roadside diner.\n",
+      "25050    Set your story in a roadside diner.\n",
+      "25051    Set your story in a roadside diner.\n",
+      "25052    Set your story in a roadside diner.\n",
+      "25053    Set your story in a roadside diner.\n",
+      "25054    Set your story in a roadside diner.\n",
+      "25055    Set your story in a roadside diner.\n",
+      "25056    Set your story in a roadside diner.\n",
+      "25057    Set your story in a roadside diner.\n",
+      "25058    Set your story in a roadside diner.\n",
+      "25059    Set your story in a roadside diner.\n",
+      "25060    Set your story in a roadside diner.\n",
+      "25061    Set your story in a roadside diner.\n",
+      "25062    Set your story in a roadside diner.\n",
+      "25063    Set your story in a roadside diner.\n",
+      "25064    Set your story in a roadside diner.\n",
+      "25065    Set your story in a roadside diner.\n",
+      "25066    Set your story in a roadside diner.\n",
+      "25067    Set your story in a roadside diner.\n",
+      "25068    Set your story in a roadside diner.\n",
+      "25069    Set your story in a roadside diner.\n",
+      "25070    Set your story in a roadside diner.\n",
+      "25071    Set your story in a roadside diner.\n",
+      "25072    Set your story in a roadside diner.\n",
+      "25073    Set your story in a roadside diner.\n",
+      "25074    Set your story in a roadside diner.\n",
+      "25075    Set your story in a roadside diner.\n",
+      "25076    Set your story in a roadside diner.\n",
+      "25077    Set your story in a roadside diner.\n",
+      "25078    Set your story in a roadside diner.\n",
+      "25079    Set your story in a roadside diner.\n",
+      "25080    Set your story in a roadside diner.\n",
+      "25081    Set your story in a roadside diner.\n",
+      "25082    Set your story in a roadside diner.\n",
+      "25083    Set your story in a roadside diner.\n",
+      "25084    Set your story in a roadside diner.\n",
+      "Name: prompt, dtype: object\n",
+      "prompt_1010\n",
+      "44580    Start your story with someone entering a museu...\n",
+      "44581    Start your story with someone entering a museu...\n",
+      "44582    Start your story with someone entering a museu...\n",
+      "44583    Start your story with someone entering a museu...\n",
+      "44584    Start your story with someone entering a museu...\n",
+      "44585    Start your story with someone entering a museu...\n",
+      "44586    Start your story with someone entering a museu...\n",
+      "44587    Start your story with someone entering a museu...\n",
+      "44588    Start your story with someone entering a museu...\n",
+      "44589    Start your story with someone entering a museu...\n",
+      "44590    Start your story with someone entering a museu...\n",
+      "44591    Start your story with someone entering a museu...\n",
+      "44592    Start your story with someone entering a museu...\n",
+      "44593    Start your story with someone entering a museu...\n",
+      "44594    Start your story with someone entering a museu...\n",
+      "44595    Start your story with someone entering a museu...\n",
+      "44596    Start your story with someone entering a museu...\n",
+      "44597    Start your story with someone entering a museu...\n",
+      "44598    Start your story with someone entering a museu...\n",
+      "44599    Start your story with someone entering a museu...\n",
+      "44600    Start your story with someone entering a museu...\n",
+      "44601    Start your story with someone entering a museu...\n",
+      "44602    Start your story with someone entering a museu...\n",
+      "44603    Start your story with someone entering a museu...\n",
+      "44604    Start your story with someone entering a museu...\n",
+      "44605    Start your story with someone entering a museu...\n",
+      "44606    Start your story with someone entering a museu...\n",
+      "44607    Start your story with someone entering a museu...\n",
+      "44608    Start your story with someone entering a museu...\n",
+      "44609    Start your story with someone entering a museu...\n",
+      "44610    Start your story with someone entering a museu...\n",
+      "44611    Start your story with someone entering a museu...\n",
+      "44612    Start your story with someone entering a museu...\n",
+      "44613    Start your story with someone entering a museu...\n",
+      "44614    Start your story with someone entering a museu...\n",
+      "44615    Start your story with someone entering a museu...\n",
+      "44616    Start your story with someone entering a museu...\n",
+      "44617    Start your story with someone entering a museu...\n",
+      "44618    Start your story with someone entering a museu...\n",
+      "44619    Start your story with someone entering a museu...\n",
+      "44620    Start your story with someone entering a museu...\n",
+      "Name: prompt, dtype: object\n",
+      "prompt_1069\n",
+      "52899    Start your story with the line, “This was supp...\n",
+      "52900    Start your story with the line, “This was supp...\n",
+      "52901    Start your story with the line, “This was supp...\n",
+      "52902    Start your story with the line, “This was supp...\n",
+      "52903    Start your story with the line, “This was supp...\n",
+      "                               ...                        \n",
+      "53026    Start your story with the line, “This was supp...\n",
+      "53027    Start your story with the line, “This was supp...\n",
+      "53028    Start your story with the line, “This was supp...\n",
+      "53029    Start your story with the line, “This was supp...\n",
+      "53030    Start your story with the line, “This was supp...\n",
+      "Name: prompt, Length: 132, dtype: object\n",
+      "prompt_1155\n",
+      "66626    Write a story that involves a mystery — it doe...\n",
+      "66627    Write a story that involves a mystery — it doe...\n",
+      "66628    Write a story that involves a mystery — it doe...\n",
+      "66629    Write a story that involves a mystery — it doe...\n",
+      "66630    Write a story that involves a mystery — it doe...\n",
+      "                               ...                        \n",
+      "66832    Write a story that involves a mystery — it doe...\n",
+      "66833    Write a story that involves a mystery — it doe...\n",
+      "66834    Write a story that involves a mystery — it doe...\n",
+      "66835    Write a story that involves a mystery — it doe...\n",
+      "66836    Write a story that involves a mystery — it doe...\n",
+      "Name: prompt, Length: 211, dtype: object\n",
+      "prompt_1025\n",
+      "47034    Write about someone who gets stuck in their wo...\n",
+      "47035    Write about someone who gets stuck in their wo...\n",
+      "47036    Write about someone who gets stuck in their wo...\n",
+      "47037    Write about someone who gets stuck in their wo...\n",
+      "47038    Write about someone who gets stuck in their wo...\n",
+      "                               ...                        \n",
+      "47193    Write about someone who gets stuck in their wo...\n",
+      "47194    Write about someone who gets stuck in their wo...\n",
+      "47195    Write about someone who gets stuck in their wo...\n",
+      "47196    Write about someone who gets stuck in their wo...\n",
+      "47197    Write about someone who gets stuck in their wo...\n",
+      "Name: prompt, Length: 164, dtype: object\n",
+      "prompt_1157\n",
+      "67225    Write a story that begins and ends with someon...\n",
+      "67226    Write a story that begins and ends with someon...\n",
+      "67227    Write a story that begins and ends with someon...\n",
+      "67228    Write a story that begins and ends with someon...\n",
+      "67229    Write a story that begins and ends with someon...\n",
+      "                               ...                        \n",
+      "67629    Write a story that begins and ends with someon...\n",
+      "67630    Write a story that begins and ends with someon...\n",
+      "67631    Write a story that begins and ends with someon...\n",
+      "67632    Write a story that begins and ends with someon...\n",
+      "67633    Write a story that begins and ends with someon...\n",
+      "Name: prompt, Length: 409, dtype: object\n",
+      "prompt_0992\n",
+      "41767    Write a story that spans exactly a year and ta...\n",
+      "41768    Write a story that spans exactly a year and ta...\n",
+      "41769    Write a story that spans exactly a year and ta...\n",
+      "41770    Write a story that spans exactly a year and ta...\n",
+      "41771    Write a story that spans exactly a year and ta...\n",
+      "                               ...                        \n",
+      "41974    Write a story that spans exactly a year and ta...\n",
+      "41975    Write a story that spans exactly a year and ta...\n",
+      "41976    Write a story that spans exactly a year and ta...\n",
+      "41977    Write a story that spans exactly a year and ta...\n",
+      "41978    Write a story that spans exactly a year and ta...\n",
+      "Name: prompt, Length: 212, dtype: object\n",
+      "prompt_0118\n",
+      "6222    Write a story about someone trying to reinvent...\n",
+      "6223    Write a story about someone trying to reinvent...\n",
+      "6224    Write a story about someone trying to reinvent...\n",
+      "6225    Write a story about someone trying to reinvent...\n",
+      "6226    Write a story about someone trying to reinvent...\n",
+      "6227    Write a story about someone trying to reinvent...\n",
+      "6228    Write a story about someone trying to reinvent...\n",
+      "6229    Write a story about someone trying to reinvent...\n",
+      "6230    Write a story about someone trying to reinvent...\n",
+      "6231    Write a story about someone trying to reinvent...\n",
+      "6232    Write a story about someone trying to reinvent...\n",
+      "6233    Write a story about someone trying to reinvent...\n",
+      "6234    Write a story about someone trying to reinvent...\n",
+      "6235    Write a story about someone trying to reinvent...\n",
+      "6236    Write a story about someone trying to reinvent...\n",
+      "6237    Write a story about someone trying to reinvent...\n",
+      "6238    Write a story about someone trying to reinvent...\n",
+      "6239    Write a story about someone trying to reinvent...\n",
+      "6240    Write a story about someone trying to reinvent...\n",
+      "6241    Write a story about someone trying to reinvent...\n",
+      "6242    Write a story about someone trying to reinvent...\n",
+      "6243    Write a story about someone trying to reinvent...\n",
+      "6244    Write a story about someone trying to reinvent...\n",
+      "6245    Write a story about someone trying to reinvent...\n",
+      "6246    Write a story about someone trying to reinvent...\n",
+      "6247    Write a story about someone trying to reinvent...\n",
+      "6248    Write a story about someone trying to reinvent...\n",
+      "6249    Write a story about someone trying to reinvent...\n",
+      "6250    Write a story about someone trying to reinvent...\n",
+      "6251    Write a story about someone trying to reinvent...\n",
+      "6252    Write a story about someone trying to reinvent...\n",
+      "6253    Write a story about someone trying to reinvent...\n",
+      "6254    Write a story about someone trying to reinvent...\n",
+      "6255    Write a story about someone trying to reinvent...\n",
+      "6256    Write a story about someone trying to reinvent...\n",
+      "6257    Write a story about someone trying to reinvent...\n",
+      "6258    Write a story about someone trying to reinvent...\n",
+      "6259    Write a story about someone trying to reinvent...\n",
+      "Name: prompt, dtype: object\n",
+      "prompt_1162\n",
+      "68371    Write a story about a proposal. \n",
+      "68372    Write a story about a proposal. \n",
+      "68373    Write a story about a proposal. \n",
+      "68374    Write a story about a proposal. \n",
+      "68375    Write a story about a proposal. \n",
+      "                       ...               \n",
+      "68580    Write a story about a proposal. \n",
+      "68581    Write a story about a proposal. \n",
+      "68582    Write a story about a proposal. \n",
+      "68583    Write a story about a proposal. \n",
+      "68584    Write a story about a proposal. \n",
+      "Name: prompt, Length: 214, dtype: object\n",
+      "prompt_1172\n",
+      "69960    Write about someone who has a superpower.\n",
+      "69961    Write about someone who has a superpower.\n",
+      "69962    Write about someone who has a superpower.\n",
+      "69963    Write about someone who has a superpower.\n",
+      "69964    Write about someone who has a superpower.\n",
+      "                           ...                    \n",
+      "70292    Write about someone who has a superpower.\n",
+      "70293    Write about someone who has a superpower.\n",
+      "70294    Write about someone who has a superpower.\n",
+      "70295    Write about someone who has a superpower.\n",
+      "70296    Write about someone who has a superpower.\n",
+      "Name: prompt, Length: 337, dtype: object\n",
+      "the first example of train is  prompt_id                                              prompt_1234\n",
+      "story1_id                                                   rikt93\n",
+      "story2_id                                                   qyd9jh\n",
+      "time_lag                                                  309660.0\n",
+      "least_likes                                                      8\n",
+      "chosen_text      <bos><|im_start|>user\\nWrite a story about som...\n",
+      "rejected_text    <bos><|im_start|>user\\nWrite a story about som...\n",
+      "Name: 0, dtype: object\n"
+     ]
+    }
+   ],
+   "source": [
+    "#test dataloader \n",
+    "from dataloader import StoryPairDataset\n",
+    "dataloader = StoryPairDataset(datapath,\n",
+    "                              pairpath,\n",
+    "                              tokenizer,\n",
+    "                              task='rm',\n",
+    "                              used_dataset_size=100,\n",
+    "                              train_test_split=0.1,\n",
+    "                              split_by='random',\n",
+    "                              max_len=1024*4,\n",
+    "                              mode='m2',\n",
+    "                              max_time_window=5400,\n",
+    "                              least_likes=5,\n",
+    "                              margin=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b11f608a-c2eb-42af-bfba-e801ee40e0ed",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

adapter_config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "google/gemma-2-9b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "o_proj",
+    "q_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7b47235a73dadbfe04c47a58fcf387d1e01f23b7db05520e13eebbbd51b9f89
+size 286306976

dataloader.py ADDED Viewed

	@@ -0,0 +1,296 @@

+from datasets import Dataset, DatasetDict
+import pandas as pd
+import numpy as np
+import glob
+from sklearn.model_selection import train_test_split
+import re
+datapath = '/cluster/work/lawecon/Work/penghao/dataset/stories/'
+pairpath = '../../../work/lawecon/Work/penghao/pairs.csv'
+#3600 ->time lags
+class StoryPairDataset(Dataset):
+    def __init__(self, datapath, pairpath, tokenizer, task, used_dataset_size=-1, train_test_split=0.1,
+                 split_by='random',
+                 max_len=4096*2, mode='m3', max_time_window=3000, least_likes=5, margin=True):
+        self.datapath = datapath
+        print(self.datapath)
+        self.train_test_split = train_test_split
+        self.pairpath = pairpath
+        self.tokenizer = tokenizer
+        self.max_len = max_len
+        self.split_by = split_by
+        self.least_likes = least_likes
+        self.max_time_window = max_time_window
+        self.used_dataset_size = used_dataset_size
+        if mode == 'm2':
+            self.max_time_window = 12009600
+        else:
+            self.max_time_window = max_time_window
+        self.pair = self.load_pair()
+        self.task = task
+        self.margin = margin
+        self.stories = self.load_stories(self.datapath)
+        print(self.stories.columns)
+        print(len(self.stories))
+        # turn df into dataset
+        # self.dataset = datasets.Dataset.from_pandas(self.df)
+        self.train, self.test = self.train_test_split__()
+        self.train = self.marginInclude(self.train)
+        self.test = self.marginInclude(self.test)
+        # combine train and test to a single dataset, before train and test
+        self.dataset = self.make_dataset()
+        print('current setting mode is ', mode)
+        print('currnet setting split_by is ', split_by)
+        print('current setting least_likes is ', least_likes)
+    def load_stories(self, path):
+        stories = pd.DataFrame()
+        #print(f"Reading stories from {path}...")
+        for file in glob.glob(path + '*.csv'):
+            #print(f"Reading {file}...")
+            try:
+                # Read the CSV file into a DataFrame
+                df = pd.read_csv(file)
+                # Check if the DataFrame is empty or not
+                if df.empty:
+                    print(f"Warning: {file} is empty or not readable.")
+                    continue
+                # Concatenate the DataFrames
+                stories = pd.concat([stories, df], ignore_index=True)
+            except pd.errors.EmptyDataError:
+                # print(f"Error: {file} is empty or not readable.")
+                pass
+            except pd.errors.ParserError:
+                print(f"Error: {file} cannot be parsed.")
+            except Exception as e:
+                print(f"Error: An unexpected error occurred while processing {file}. Details: {str(e)}")
+        # contain Index(['prompt_id', 'prompt', 'story_id', 'story_title', 'story_author', 'story_url', 'link', 'genre', 'is_sensitive', 'categories', 'likes', 'story_text', 'posted_date', 'comments'], dtype='object')
+        return stories
+    def load_pair(self):
+        pair = pd.read_csv(self.pairpath)
+        # contain the colums of prompt_id, story1_id, story2_id, rel, time_lag, least_likes
+        pair = pair[pair['time_lag'] <= self.max_time_window]
+        print('the max of tima lag is ', pair['time_lag'].max())
+        pair = pair[pair['least_likes'] >= self.least_likes]
+        # swap the order of story1 and story2 if rel is negative, and makes rel positive
+        pair.loc[pair['rel'] < 0, ['story1_id', 'story2_id']] = pair.loc[
+            pair['rel'] < 0, ['story2_id', 'story1_id']].values
+        pair['rel'] = abs(pair['rel'])
+        # filter the pair if they have same story id
+        pair = pair[pair['story1_id'] != pair['story2_id']]
+        if self.used_dataset_size == -1:
+            self.used_dataset_size = len(pair)
+        else:
+            pair = pair.sample(n=self.used_dataset_size)
+        print('the total number of pairs is ', len(pair))
+        # remove the duplicate pairs
+        pair = pair.drop_duplicates(subset=['story1_id', 'story2_id'])
+        #remove the rel = 0
+        pair = pair[pair['rel'] != 0]
+        print('the number of effective pairs is ', len(pair))
+        return pair
+    def marginInclude(self, df):
+        if self.margin:
+            # drop the column of rel
+            df = df.drop(columns=['rel'])
+        else:
+            # rename rel to margin
+            df = df.rename(columns={'rel': 'margin'})
+        return df
+    def train_test_split__(self):
+        '''
+        split the pairs into train and test set
+        :return:
+        '''
+        test_size = round(len(self.pair) * self.train_test_split)
+        if self.split_by == 'time':
+            # give the pair the information of year according to the story_id
+            self.stories['posted_date'] = pd.to_datetime(self.stories['posted_date'])
+            #convert datetime64[ns] to comparable format, e.g.  2021-04-27 23:29:00 -> 20210427
+            self.stories['posted_date'] = self.stories['posted_date'].dt.strftime('%Y%m%d')
+            # the time after 2022 is test set
+            test = self.pair[self.pair['story1_id'].apply(lambda x: int(self.stories[self.stories['story_id'] == x]['posted_date'].values[0]) > 20220000)]
+            train = self.pair[self.pair['story1_id'].apply(lambda x: int(self.stories[self.stories['story_id'] == x]['posted_date'].values[0]) <= 20220000)]
+            print('the number of test set is ', len(test))
+            print('the number of train set is ', len(train))
+            print('the ratio of test set is ', len(test) / (len(test) + len(train)))
+        elif self.split_by == 'random':
+            train, test = train_test_split(self.pair, test_size=self.train_test_split)
+            # covert to huggingface dataset
+        elif self.split_by == 'genre':
+            # count the number of pairs for each category
+            # give the pair the information of category according to the story_id
+            self.pair['genre'] = self.pair['story1_id'].apply(
+                lambda x: self.stories[self.stories['story_id'] == x]['genre'].values[0])
+            genre = {}
+            for c in self.pair['genre'].unique():
+                genre[c] = len(self.pair[self.pair['genre'] == c])
+            # select the category to nearest to 10 per cent of the total
+            genre = dict(sorted(genre.items(), key=lambda item: item[1], reverse=True))#sort the genre by the number of pairs from high to low
+            print(genre)
+            total = sum(genre.values())
+            #select the close genre to 10% of the total
+            test_genre = []
+            test_count = 0
+            while test_count < total * self.train_test_split:
+                test_genre.append(list(genre.keys())[0])
+                test_count += genre[list(genre.keys())[0]]
+                del genre[list(genre.keys())[0]]
+                if test_count + genre[list(genre.keys())[0]] > total * self.train_test_split:
+                    break
+            test = self.pair[self.pair['genre'].apply(lambda x: x in test_genre)]
+            train = self.pair[self.pair['genre'].apply(lambda x: x not in test_genre)]
+            print('the genre of test set is ', test_genre)
+            print('the percentage of test set is ', test_count / total,'where total is ', total)
+        elif self.split_by == 'chaos':
+            #instead using the pairs, we randomly assign the story id to replace the old story id from that prompt
+            for i in range(len(self.pair)):
+                self.pair.at[i, 'story1_id'] = np.random.choice(self.stories[self.stories['prompt_id'] == self.pair.at[i, 'prompt_id']]['story_id'].values)
+                self.pair.at[i, 'story2_id'] = np.random.choice(self.stories[self.stories['prompt_id'] == self.pair.at[i, 'prompt_id']]['story_id'].values)
+            train, test = train_test_split(self.pair, test_size=self.train_test_split)
+        return train, test
+    def apply_template_to_text(self, row):
+        # Ensure proper access to columns in pair
+        prompt_id, story1_id, story2_id = row[['prompt_id', 'story1_id', 'story2_id']]
+        # Extract text based on IDs
+        chosen_prompt = self.stories[self.stories['prompt_id'] == prompt_id]['prompt']
+        chosen_prompt = chosen_prompt.values[0]
+        chosen_story = self.stories[self.stories['story_id'] == story1_id]['story_title'].values[0] + '/n' + \
+                       self.stories[self.stories['story_id'] == story1_id]['story_text'].values[0]
+        rejected_prompt = self.stories[self.stories['prompt_id'] == prompt_id]['prompt']
+        rejected_prompt = rejected_prompt.values[0]
+        rejected_story = self.stories[self.stories['story_id'] == story2_id]['story_title'].values[0] + '/n' + \
+                            self.stories[self.stories['story_id'] == story2_id]['story_text'].values[0]
+        # Create chosen and rejected text dictionaries
+        chosen_text = [{'role': 'user', 'content': chosen_prompt},
+                       {'role': 'assistant', 'content': chosen_story}]
+        rejected_text = [{'role': 'user', 'content': rejected_prompt},
+                         {'role': 'assistant', 'content': rejected_story}]
+        # Apply tokenizer to chosen and rejected text
+        chosen_text = self.tokenizer.apply_chat_template(chosen_text, tokenize=False)
+        rejected_text = self.tokenizer.apply_chat_template(rejected_text, tokenize=False)
+        res = {}
+        res['chosen_text'] = chosen_text
+        res['rejected_text'] = rejected_text
+        #add eos and bos token
+        res['chosen_text'] = self.tokenizer.bos_token + res['chosen_text'] + self.tokenizer.eos_token
+        res['rejected_text'] = self.tokenizer.bos_token + res['rejected_text'] + self.tokenizer.eos_token
+        res['text'] = chosen_text
+        #add eos and bos token
+        res['text'] = self.tokenizer.bos_token + res['text'] + self.tokenizer.eos_token
+        if 'gemma' in self.tokenizer.name_or_path:
+            split_words = '<|im_start|>assistant\n'
+        elif 'mistral' in self.tokenizer.name_or_path or 'llama' in self.tokenizer.name_or_path:
+            split_words = '[/INST]'
+        chosen_text_tmp = chosen_text.split(split_words)[-1]
+        prompt_text = chosen_text.replace(chosen_text_tmp, '')
+        chosen_text = chosen_text_tmp
+        rejected_text = rejected_text.split(split_words)[-1]
+        res['prompt'] = prompt_text
+        res['chosen'] = chosen_text
+        res['rejected'] = rejected_text
+        # add bos and eos token
+        res['prompt'] = self.tokenizer.bos_token + res['prompt']
+        res['chosen'] = res['chosen'] + self.tokenizer.eos_token
+        res['rejected'] = res['rejected'] + self.tokenizer.eos_token
+        return res
+    def convert_sft(self,df):
+        #collect all the story id in the pair
+        story_ids = list(set(df['story1_id'].values) | set(df['story2_id'].values))
+        #now make new train and test set as story_ids as story1_id and story2_id
+        df = pd.DataFrame()
+        df['story1_id'] = story_ids
+        df['story2_id'] = df['story1_id']
+        #reload stories
+        #self.stories = self.load_stories(self.datapath)
+        # get prompt_id from the pair
+        def get_prompt_id(x):
+            return self.stories[self.stories['story_id'] == x]['prompt_id'].values[0]
+        df['prompt_id'] = df['story1_id'].apply(lambda x: get_prompt_id(x))
+        return df
+    def make_dataset(self):
+        # reset the index
+        self.train.reset_index(drop=True, inplace=True)
+        self.test.reset_index(drop=True, inplace=True)
+        entries = []
+        if self.task == 'rm':
+            entries = ['chosen_text', 'rejected_text']
+        elif self.task == 'dpo':
+            entries = ['prompt', 'chosen', 'rejected']
+        elif self.task == 'sft':
+            self.train = self.convert_sft(self.train)
+            self.test = self.convert_sft(self.test)
+            entries = ['text']
+        print('the columns of train is ', self.train.columns)
+        for index, row in self.train.iterrows():
+            res = self.apply_template_to_text(row)
+            for e in entries:
+                self.train.at[index, e] = res[e]
+        for index, row in self.test.iterrows():
+            res = self.apply_template_to_text(row)
+            for e in entries:
+                self.test.at[index, e] = res[e]
+        print('the first example of train is ', self.train.iloc[0])
+        #since the we aggred on max_len = 8192, we need to filter this
+        if self.margin:
+            entries.append('margin')
+        train_dataset = Dataset.from_pandas(self.train[entries])
+        test_dataset = Dataset.from_pandas(self.test[entries])
+        return DatasetDict({'train': train_dataset, 'test': test_dataset})
+    def save_dataset(self, path):
+        '''
+        save the dataset to the readsy folder
+        :param path:
+        :return:
+        '''
+        self.dataset.save_to_disk('../' + path)

model/SFTmodels/gemma-2b_sftm3genre10vast/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: model/gemma/gemma-2b/
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.11.1

model/SFTmodels/gemma-2b_sftm3genre10vast/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "model/gemma/gemma-2b/",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": "unsloth",
+  "target_modules": [
+    "q_proj",
+    "o_proj",
+    "down_proj",
+    "gate_proj",
+    "v_proj",
+    "k_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

model/SFTmodels/gemma-2b_sftm3genre10vast/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:80d3015564e983c4b08077e0ec998c5dc5aaac6063bd4cc6c9a32379898435b8
+size 78480072