--- base_model: llm-jp/llm-jp-3-13b tags: - text-generation-inference - transformers - unsloth - llama - trl license: apache-2.0 language: - en --- # Uploaded model - **Developed by:** HBD007 - **License:** apache-2.0 - **Finetuned from model :** llm-jp/llm-jp-3-13b This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library. [](https://github.com/unslothai/unsloth) # Inference code ```python # 必要なライブラリをインポート import json from datetime import datetime from pathlib import Path import jsonlines import torch from huggingface_hub import get_token from peft import PeftModel from tqdm.notebook import tqdm from transformers import pipeline from transformers.pipelines.text_generation import TextGenerationPipeline from unsloth import FastLanguageModel # 定数の定義 HF_TOKEN = get_token() PROMPT_TEMPLATE = "\n".join( [ "### 指示", "{}", "### 回答", "{}", ] ) # 変数の設定 base_model_id = "llm-jp/llm-jp-3-13b" # 使用するベースモデル adapter_id = "HBD007/llm-jp-3-13b-LLM2024-lora" # LoRAアダプタ input_data_path = Path("data/elyza-tasks-100-TV_0.jsonl") # 入力データのパス output_file_path = Path( f"inference_results-{datetime.now().strftime('%Y%m%d-%H%M%S')}.jsonl" ) # データの読み込み datasets_list = [obj for obj in jsonlines.open(input_data_path)] # モデルとトークナイザーの読み込み model, tokenizer = FastLanguageModel.from_pretrained( model_name=base_model_id, trust_remote_code=True, token=HF_TOKEN, ) # LoRAアダプタのロード model = PeftModel.from_pretrained( model, adapter_id, token=HF_TOKEN, ) # 推論モードに設定 model = FastLanguageModel.for_inference(model) # テキスト生成パイプラインの作成 generator: TextGenerationPipeline = pipeline( task="text-generation", model=model, tokenizer=tokenizer, use_cache=True, do_sample=False, repetition_penalty=1.2, ) # GPUの状態を表示 if torch.cuda.is_available(): gpu_stats = torch.cuda.get_device_properties(0) start_gpu_memory = round(torch.cuda.max_memory_reserved() / (1024**3), 3) max_memory = round(gpu_stats.total_memory / (1024**3), 3) print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.") print(f"{start_gpu_memory} GB of memory reserved.") # 結果を生成 results = [] for dt in tqdm(datasets_list): input_text = dt["input"] task_id = dt["task_id"] generated = generator( text_inputs=PROMPT_TEMPLATE.format(input_text, ""), return_full_text=False, ) results.append({ "task_id": task_id, "input": input_text, "output": generated[0]["generated_text"], }) # 結果を保存 with output_file_path.open("w", encoding="utf-8") as f: for result in results: json.dump(result, f, ensure_ascii=False) f.write("\n") print(f"Inference results saved to {output_file_path}") ```