Uploaded model
- Developed by: satami
- License: apache-2.0
- Finetuned from model : llm-jp/llm-jp-3-13b
This llama model was trained 2x faster with Unsloth and Huggingface's TRL library.
出力方法
from unsloth import FastLanguageModel
from peft import PeftModel
from tqdm import tqdm
import torch
import json
import re
#モデルのロード
max_seq_length = 1024
dtype = None
load_in_4bit = True
model_id = "llm-jp/llm-jp-3-13b"
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = model_id,
dtype = dtype,
load_in_4bit = load_in_4bit,
trust_remote_code = True
)
#RoLAアダプタのロード
adapter_id = "satami/llm-jp-3-13b_LoRA_with_policy_v2"
HF_TOKEN = "your_token"
model = PeftModel.from_pretrained(model, adapter_id, token = HF_TOKEN)
#データセットのロード
datasets = []
with open("../../dataset/elyza-tasks-100-TV_0.jsonl", "r") as f:
item = ""
for line in f:
line = line.strip()
item += line
if item.endswith("}"):
datasets.append(json.loads(item))
item = ""
#タスクの実行
FastLanguageModel.for_inference(model)
results = []
for dt in tqdm(datasets):
input = dt["input"]
prompt = f"""以下は、タスクを説明する指示です。自分が何を出力するかを簡潔に答えた後、要求を適切に満たす応答を書きなさい。
### 指示:{input}
### タスク:"""
inputs = tokenizer([prompt], return_tensors = "pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens = 1024, use_cache = True, do_sample=False, repetition_penalty=1.2)
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).split('\n### タスク:')[-1]
results.append({"task_id": dt["task_id"], "input": input, "output": prediction})
#本モデルはタスクの要約と応答を出力するので、タスクの要約を出力から削除します。
for i in range(len(results)):
results[i]["output"] = results[i]["output"].split("### 応答:")[-1]
# jsonlで保存
new_model_id = "ichikara-elyza_r=16_with-policy"
with open(f"{new_model_id}_output.jsonl", 'w', encoding='utf-8') as f:
for result in results:
json.dump(result, f, ensure_ascii=False)
f.write('\n')
Model tree for satami/llm-jp-3-13b_LoRA_with_policy_v2
Base model
llm-jp/llm-jp-3-13b