Uploaded model

  • Developed by: okaba815
  • License: apache-2.0
  • Finetuned from model : llm-jp/llm-jp-3-13b

This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.

モデルのuploadまで


%%capture   #結果を非表示にするセルマジックコマンド
!pip uninstall unsloth -y
!pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

%%capture   #結果を非表示にするセルマジックコマンド
!pip install --upgrade torch
!pip install --upgrade xformers

%%capture   #結果を非表示にするセルマジックコマンド
!pip install ipywidgets --upgrade

import torch
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install --no-deps packaging ninja einops "flash-attn>=2.6.3"

HF_TOKEN = "your token"


from unsloth import FastLanguageModel
import torch
max_seq_length = 512 # unslothではRoPEをサポートしているのでコンテキスト長は自由に設定可能
dtype = None # Noneにしておけば自動で設定
load_in_4bit = True # 今回は13Bモデルを扱うためTrue

model_id = "llm-jp/llm-jp-3-13b"
new_model_id = "llm-jp-3-13b-it" #Fine-Tuningしたモデルにつけたい名前、it: Instruction Tuning

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_id,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    trust_remote_code=True,
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 32,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0.05,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
    max_seq_length = max_seq_length,
)

from datasets import load_dataset

dataset = load_dataset("json", data_files="./ichikara-instruction-003-001-1.json")

prompt = """### 指示
{}

{}"""


"""
formatting_prompts_func: 各データをプロンプトに合わせた形式に合わせる
"""
EOS_TOKEN = tokenizer.eos_token # トークナイザーのEOSトークン(文末トークン)
def formatting_prompts_func(examples):
    input = examples["text"] # 入力データ
    output = examples["output"] # 出力データ
    text = prompt.format(input, output) + EOS_TOKEN # プロンプトの作成
    return { "formatted_text" : text, } # 新しいフィールド "formatted_text" を返す
pass

dataset = dataset.map(
    formatting_prompts_func,
    num_proc= 4, # 並列処理数を指定
)

dataset

print(dataset["train"]["formatted_text"][3])

"""
training_arguments: 学習の設定

  - output_dir:
      -トレーニング後のモデルを保存するディレクトリ

  - per_device_train_batch_size:
      - デバイスごとのトレーニングバッチサイズ

  - per_device_eval_batch_size:
      - デバイスごとの評価バッチサイズ

  - gradient_accumulation_steps:
      - 勾配を更新する前にステップを積み重ねる回数

  - optim:
      - オプティマイザの設定

  - num_train_epochs:
      - エポック数

  - eval_strategy:
      - 評価の戦略 ("no"/"steps"/"epoch")

  - eval_steps:
      - eval_strategyが"steps"のとき、評価を行うstep間隔

  - logging_strategy:
      - ログ記録の戦略

  - logging_steps:
      - ログを出力するステップ間隔

  - warmup_steps:
      - 学習率のウォームアップステップ数

  - save_steps:
      - モデルを保存するステップ間隔

  - save_total_limit:
      - 保存しておくcheckpointの数

  - max_steps:
      - トレーニングの最大ステップ数

  - learning_rate:
      - 学習率

  - fp16:
      - 16bit浮動小数点の使用設定(第8回演習を参考にすると良いです)

  - bf16:
      - BFloat16の使用設定

  - group_by_length:
      -  入力シーケンスの長さによりバッチをグループ化 (トレーニングの効率化)

  - report_to:
      - ログの送信先 ("wandb"/"tensorboard"など)
"""
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset=dataset["train"],
    max_seq_length = max_seq_length,
    dataset_text_field="formatted_text",
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        num_train_epochs = 1,
        logging_steps = 10,
        warmup_steps = 10,
        save_steps=100,
        save_total_limit=2,
        max_steps=-1,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        group_by_length=True,
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)

#@title 現在のメモリ使用量を表示
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

#@title 学習実行
trainer_stats = trainer.train()


import json
datasets = []
with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
    item = ""
    for line in f:
      line = line.strip()
      item += line
      if item.endswith("}"):
        datasets.append(json.loads(item))
        item = ""

from tqdm import tqdm

FastLanguageModel.for_inference(model)

results = []
for dt in tqdm(datasets):
  input = dt["input"]

  prompt = f"""### 指示\n{input}\n### 回答\n"""

  inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)

  outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True, do_sample=False, repetition_penalty=1.2)
  prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]

  results.append({"task_id": dt["task_id"], "input": input, "output": prediction})

with open(f"{new_model_id}_output.jsonl", 'w', encoding='utf-8') as f:
    for result in results:
        json.dump(result, f, ensure_ascii=False)
        f.write('\n')

HF_TOKEN = "your token"

model.push_to_hub_merged(
    new_model_id+"_lora",
    tokenizer=tokenizer,
    save_method="lora",
    token=HF_TOKEN,
    private=True
)

保存したモデルの使い方

%%capture
!pip install unsloth
!pip uninstall unsloth -y && pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install -U torch
!pip install -U peft

from unsloth import FastLanguageModel
from peft import PeftModel
import torch
import json
from tqdm import tqdm
import re

model_id = "llm-jp/llm-jp-3-13b"
adapter_id = "okaba815/llm-jp-3-13b-it_lora"

HF_TOKEN = "your token"

dtype = None # Noneにしておけば自動で設定
load_in_4bit = True # 今回は13Bモデルを扱うためTrue

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_id,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    trust_remote_code=True,
)

model = PeftModel.from_pretrained(model, adapter_id, token = HF_TOKEN)


datasets = []
with open("./elyza-tasks-100-TV_0.jsonl", "r") as f:
    item = ""
    for line in f:
      line = line.strip()
      item += line
      if item.endswith("}"):
        datasets.append(json.loads(item))
        item = ""


FastLanguageModel.for_inference(model)

results = []
for dt in tqdm(datasets):
  input = dt["input"]

  prompt = f"""### 指示\n{input}\n### 回答\n"""

  inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)

  outputs = model.generate(**inputs, max_new_tokens = 512, use_cache = True, do_sample=False, repetition_penalty=1.2)
  prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### 回答')[-1]

  results.append({"task_id": dt["task_id"], "input": input, "output": prediction})


json_file_id = re.sub(".*/", "", adapter_id)
with open(f"/content/{json_file_id}_output.jsonl", 'w', encoding='utf-8') as f:
    for result in results:
        json.dump(result, f, ensure_ascii=False)
        f.write('\n')
Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no pipeline_tag.

Model tree for okaba815/llm-jp-3-13b-it_lora

Finetuned
(1119)
this model