{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "a41f141c-b6a8-40d1-b72d-127d028c0592", "metadata": {}, "outputs": [], "source": [ "import os\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "\n", "model_path = os.getcwd()\n", "print(model_path)\n", "tokenizer = AutoTokenizer.from_pretrained(model_path, legacy=False)\n", "model = AutoModelForCausalLM.from_pretrained(model_path, use_safetensors=True, local_files_only=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "93e9ec6a-4a57-484f-a1a5-ecb6674e8f77", "metadata": {}, "outputs": [], "source": [ "#inputs = tokenizer('', return_tensors=\"pt\")\n", "#outputs = model.generate(inputs['input_ids'], max_new_tokens=20, temperature=0)\n", "#print(tokenizer.decode(outputs[0], skip_special_tokens=True))" ] }, { "cell_type": "code", "execution_count": null, "id": "e570b6db-efa8-4c9f-ac71-573479b00711", "metadata": {}, "outputs": [], "source": [ "model.gradient_checkpointing_enable()" ] }, { "cell_type": "code", "execution_count": null, "id": "9345e74b-5bef-4cc9-982e-342af69b290a", "metadata": {}, "outputs": [], "source": [ "from peft import LoraConfig\n", "\n", "config = LoraConfig(\n", " r=32,\n", " lora_alpha=64,\n", " target_modules=[\n", " \"q_proj\",\n", " \"k_proj\",\n", " \"v_proj\",\n", " \"o_proj\",\n", " \"w1\",\n", " \"w2\",\n", " \"w3\",\n", " \"lm_head\",\n", " ],\n", " bias=\"none\",\n", " lora_dropout=0.05, # Conventional\n", " task_type=\"CAUSAL_LM\",\n", ")\n", "\n", "#print(model)" ] }, { "cell_type": "code", "execution_count": null, "id": "b43aec47-5fa4-48c9-8e57-9c6b233b9c7e", "metadata": {}, "outputs": [], "source": [ "def split_and_trim(text):\n", " paragraphs = text.strip().split('\\n\\n')\n", " trimmed_paragraphs = []\n", " for para in paragraphs:\n", " trimmed_lines = [line.lstrip() for line in para.split('\\n')]\n", " trimmed_paragraphs.append('\\n'.join(trimmed_lines))\n", "\n", " return trimmed_paragraphs\n", "\n", "with open(\"data.txt\", \"r\") as f:\n", " content = f.read()\n", " dataset = split_and_trim(content)\n", " tokenized_train_dataset = [\n", " tokenizer(content)['input_ids'] for content in dataset\n", " ]\n", "#tokenized_train_dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "09dd4848-9c7a-4a3b-9887-59652c915cc3", "metadata": {}, "outputs": [], "source": [ "import transformers\n", "from datetime import datetime\n", "\n", "project = \"moe_shakespeare15M\"\n", "run_name = project\n", "output_dir = \"./\" + run_name\n", "\n", "checkpointing_args = {\"use_reentrant\": False}\n", "trainer = transformers.Trainer(\n", " model=model,\n", " train_dataset=tokenized_train_dataset,\n", " args=transformers.TrainingArguments(\n", " output_dir=output_dir,\n", " warmup_steps=10,\n", " per_device_train_batch_size=2,\n", " gradient_accumulation_steps=1,\n", " gradient_checkpointing=True,\n", " max_steps=3000,\n", " learning_rate=2.5e-5, # Want a small lr for finetuning\n", " # fp16=True, \n", " optim=\"adamw_torch\",\n", " # logging_steps=25, # When to start reporting loss\n", " # logging_dir=\"./logs\", # Directory for storing logs\n", " save_strategy=\"steps\", # Save the model checkpoint every logging step\n", " save_steps=50, # Save checkpoints every 50 steps\n", " logging_steps=100,\n", " save_total_limit=4,\n", " # evaluation_strategy=\"steps\", # Evaluate the model every logging step\n", " # eval_steps=25, # Evaluate and save checkpoints every 50 steps\n", " # do_eval=True, # Perform evaluation at the end of training\n", " report_to=\"none\", # Comment this out if you don't want to use weights & baises\n", " run_name=f\"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}\" # Name of the W&B run (optional)\n", " ),\n", " data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),\n", ")\n", "\n", "model.config.use_cache = False # silence the warnings. Please re-enable for inference!\n", "trainer.train()" ] }, { "cell_type": "code", "execution_count": null, "id": "7f0ad783-3f3e-4812-bc4e-026f9aad1435", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }