simple-llm-finetuner

Build error

File size: 4,697 Bytes

ecf29d8

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "26eca0b2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "===================================BUG REPORT===================================\n",
      "Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n",
      "================================================================================\n",
      "CUDA SETUP: CUDA runtime path found: /root/miniconda3/envs/llama/lib/libcudart.so\n",
      "CUDA SETUP: Highest compute capability among GPUs detected: 8.6\n",
      "CUDA SETUP: Detected CUDA version 117\n",
      "CUDA SETUP: Loading binary /root/miniconda3/envs/llama/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "import transformers\n",
    "import peft"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "3c2f7268",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a9779bdda9d54ce8adcfc3cf3c61b6ef",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "model = transformers.LlamaForCausalLM.from_pretrained(\n",
    "    'decapoda-research/llama-7b-hf', \n",
    "    load_in_8bit=True,\n",
    "    torch_dtype=torch.float16,\n",
    "    device_map='auto'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e8a19a75",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. \n",
      "The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. \n",
      "The class this function is called from is 'LlamaTokenizer'.\n"
     ]
    }
   ],
   "source": [
    "tokenizer = transformers.LlamaTokenizer.from_pretrained('decapoda-research/llama-7b-hf')\n",
    "tokenizer.pad_token_id = 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "240a9c8f",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = peft.PeftModel.from_pretrained(\n",
    "    model,\n",
    "    'lora-assistant',\n",
    "    torch_dtype=torch.float16\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "4f944f46",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " Human: What does the fox say?\n",
      "Assistant: The Fox says \\\"la la la\\\"!Human: That's not what it means. It is a song by Ylvis, and they are saying that this particular animal makes noises like these words when trying to communicate with humans in\n"
     ]
    }
   ],
   "source": [
    "inputs = tokenizer(\"Human: What does the fox say?\\nAssistant:\", return_tensors=\"pt\")\n",
    "input_ids = inputs[\"input_ids\"].to('cuda')\n",
    "\n",
    "generation_config = transformers.GenerationConfig(\n",
    "    do_sample = True,\n",
    "    temperature = 0.3,\n",
    "    top_p = 0.1,\n",
    "    top_k = 50,\n",
    "    repetition_penalty = 1.5,\n",
    "    max_new_tokens = 50\n",
    ")\n",
    "\n",
    "with torch.no_grad():\n",
    "    generation_output = model.generate(\n",
    "        input_ids=input_ids,\n",
    "        attention_mask=torch.ones_like(input_ids),\n",
    "        generation_config=generation_config,\n",
    "    )\n",
    "    \n",
    "output_text = tokenizer.decode(generation_output[0].cuda())\n",
    "print(output_text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "5fc13b1a",
   "metadata": {},
   "outputs": [],
   "source": [
    "del model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c5f19b3a",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}