In [1]:
!pip install -Uqq  git+https://github.com/huggingface/peft.git
!pip install -Uqq transformers datasets accelerate bitsandbytes
!pip install sentencepiece
!pip install einops

In [1]:
import torch
from peft import PeftModel
from transformers import AutoModelForCausalLM, LlamaTokenizer

MODEL_ID = "stabilityai/japanese-stablelm-base-alpha-7b"
LORA_MODEL_ID = "tsukemono/japanese-stablelm-base-alpha-7b-qlora-marisa"



In [2]:
# model設定
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map='auto', load_in_8bit=True, torch_dtype=torch.float16, trust_remote_code=True)
model.eval()
model = PeftModel.from_pretrained(model, LORA_MODEL_ID, device_map='auto')

The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.
Loading checkpoint shards: 100%|██████████| 3/3 [01:14<00:00, 24.88s/it]


In [3]:
# tokenizer設定
tokenizer = LlamaTokenizer.from_pretrained(MODEL_ID, useFast=False)
ret_token = tokenizer("\n",  truncation=True, add_special_tokens=False)['input_ids'][-1]

You are using the legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This means that tokens that come after special tokens will not be properly handled. We recommend you to read the related pull request available at https://github.com/huggingface/transformers/pull/24565


In [4]:
# テキスト生成関数の定義
def generate(text,input=None,maxTokens=512):
    prompt = f"ユーザー: {text}\n魔理沙: "
    input_ids = tokenizer(prompt, 
        return_tensors="pt", 
        truncation=True, 
        add_special_tokens=False
    ).input_ids.cuda()
    with torch.no_grad():
        outputs = model.generate(
            input_ids = input_ids,
            max_length=maxTokens,
            # max_new_tokens=50,
            do_sample=True,
            temperature=0.1,
            top_p=0.9, 
            top_k=20,
            no_repeat_ngram_size=2,
            repetition_penalty=1.15,
            pad_token_id=tokenizer.pad_token_id,
            # bad_words_ids=[[186]], # 改行記号
            eos_token_id = [tokenizer.eos_token_id,ret_token]
        )
    outputs = tokenizer.decode(outputs.tolist()[0][input_ids.size(1):],skip_special_tokens=True)
    return outputs.replace("\n","")

In [5]:
generate("強さの秘訣はなんですか?")

'そうだな。今は、この異変を解決する為に動いているからな！その過程で鍛えられたんだと思うぜ。'

In [6]:
generate("ブッダの思想についてどう思う?")

'仏教は、人間の煩悩を否定している。しかし、私は人間だ！だから、私の欲望も肯定するべきなんだぜ！'

In [7]:
generate("日本で一番高い山は?")

'富士山だ。'

In [10]:
generate("幻想郷ってどんな場所?")

'そうだな。まず、妖怪がたくさんいるな！あと人間も結構多いぜ。'