{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {},
      "outputs": [],
      "source": [
        "!pip install -Uqq  git+https://github.com/huggingface/peft.git\n",
        "!pip install -Uqq transformers datasets accelerate bitsandbytes\n",
        "!pip install sentencepiece\n",
        "!pip install einops"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "\n",
            "===================================BUG REPORT===================================\n",
            "Welcome to bitsandbytes. For bug reports, please run\n",
            "\n",
            "python -m bitsandbytes\n",
            "\n",
            " and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n",
            "================================================================================\n",
            "bin C:\\Users\\horiy\\AppData\\Roaming\\Python\\Python39\\site-packages\\bitsandbytes\\libbitsandbytes_cuda116.dll\n",
            "CUDA SETUP: CUDA runtime path found: C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.6\\bin\\cudart64_110.dll\n",
            "CUDA SETUP: Highest compute capability among GPUs detected: 8.6\n",
            "CUDA SETUP: Detected CUDA version 116\n",
            "CUDA SETUP: Loading binary C:\\Users\\horiy\\AppData\\Roaming\\Python\\Python39\\site-packages\\bitsandbytes\\libbitsandbytes_cuda116.dll...\n"
          ]
        }
      ],
      "source": [
        "import torch\n",
        "from peft import PeftModel\n",
        "from transformers import AutoModelForCausalLM, LlamaTokenizer\n",
        "\n",
        "MODEL_ID = \"stabilityai/japanese-stablelm-base-alpha-7b\"\n",
        "LORA_MODEL_ID = \"tsukemono/japanese-stablelm-base-alpha-7b-qlora-marisa\","
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "VOmiOziuEr6N",
        "outputId": "678b317e-f235-43f2-e443-1df05bd20253"
      },
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.\n",
            "Loading checkpoint shards: 100%|██████████| 3/3 [01:14<00:00, 24.88s/it]\n"
          ]
        }
      ],
      "source": [
        "# model設定\n",
        "model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map='auto', load_in_8bit=True, torch_dtype=torch.float16, trust_remote_code=True)\n",
        "model.eval()\n",
        "model = PeftModel.from_pretrained(model, LORA_MODEL_ID, device_map='auto')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {},
      "outputs": [
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "You are using the legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565\n"
          ]
        }
      ],
      "source": [
        "# tokenizer設定\n",
        "tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID, useFast=False)\n",
        "ret_token = tokenizer(\"\\n\",  truncation=True, add_special_tokens=False)['input_ids'][-1]"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {},
      "outputs": [],
      "source": [
        "# テキスト生成関数の定義\n",
        "def generate(text,input=None,maxTokens=512):\n",
        "    prompt = f\"ユーザー: {text}\\n魔理沙: \"\n",
        "    input_ids = tokenizer(prompt, \n",
        "        return_tensors=\"pt\", \n",
        "        truncation=True, \n",
        "        add_special_tokens=False\n",
        "    ).input_ids.cuda()\n",
        "    with torch.no_grad():\n",
        "        outputs = model.generate(\n",
        "            input_ids = input_ids,\n",
        "            max_length=maxTokens,\n",
        "            # max_new_tokens=50,\n",
        "            do_sample=True,\n",
        "            temperature=0.1,\n",
        "            top_p=0.9, \n",
        "            top_k=20,\n",
        "            no_repeat_ngram_size=2,\n",
        "            repetition_penalty=1.15,\n",
        "            pad_token_id=tokenizer.pad_token_id,\n",
        "            # bad_words_ids=[[186]], # 改行記号\n",
        "            eos_token_id = [tokenizer.eos_token_id,ret_token]\n",
        "        )\n",
        "    outputs = tokenizer.decode(outputs.tolist()[0][input_ids.size(1):],skip_special_tokens=True)\n",
        "    return outputs.replace(\"\\n\",\"\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "'そうだな。今は、この異変を解決する為に動いているからな！その過程で鍛えられたんだと思うぜ。'"
            ]
          },
          "execution_count": 5,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "generate(\"強さの秘訣はなんですか?\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "'仏教は、人間の煩悩を否定している。しかし、私は人間だ！だから、私の欲望も肯定するべきなんだぜ！'"
            ]
          },
          "execution_count": 6,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "generate(\"ブッダの思想についてどう思う?\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "'富士山だ。'"
            ]
          },
          "execution_count": 7,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "generate(\"日本で一番高い山は?\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {},
      "outputs": [
        {
          "data": {
            "text/plain": [
              "'そうだな。まず、妖怪がたくさんいるな！あと人間も結構多いぜ。'"
            ]
          },
          "execution_count": 10,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "generate(\"幻想郷ってどんな場所?\")"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "gpuType": "T4",
      "provenance": []
    },
    "gpuClass": "standard",
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.9.5"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}