--- base_model: llm-jp/llm-jp-3-13b tags: - text-generation-inference - transformers - unsloth - llama - trl license: cc-by-nc-sa-4.0 language: - en --- # Uploaded model - **Developed by:** CLRafaelR - **License:** apache-2.0 - **Finetuned from model :** llm-jp/llm-jp-3-13b This llama model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library. [](https://github.com/unslothai/unsloth) # ライセンス cc-by-nc-sa # 実行方法 ## 必要パッケージのロード ```python get_ipython().system("pip install torch==2.2.1+cu121 torchvision --index-url https://download.pytorch.org/whl/cu121") get_ipython().system( 'pip install --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"' ) get_ipython().system('pip install --no-deps "xformers<0.0.26" --force-reinstall') get_ipython().system('pip install flash-attn==2.6.3') get_ipython().system("pip install schedulefree") get_ipython().system("pip install ipywidgets --upgrade") get_ipython().system("pip install langchain langchain-community langchain-huggingface faiss-cpu jq polars") from unsloth import FastLanguageModel from peft import PeftModel import torch import json from tqdm import tqdm import re import gc import datetime from transformers.trainer_utils import set_seed from datasets import load_dataset import os import getpass from langchain_huggingface.embeddings import HuggingFaceEmbeddings import polars as pl from langchain_community.document_loaders import HuggingFaceDatasetLoader from langchain_community.vectorstores import FAISS from pprint import pprint from typing import List from langchain_core.documents import Document from langchain_core.runnables import chain import time from transformers import TextStreamer if not os.environ.get("HF_TOKEN"): os.environ["HF_TOKEN"] = getpass.getpass( "Enter your Hugging Face API key: ", ) HF_TOKEN = os.environ["HF_TOKEN"] def flush(): gc.collect() torch.cuda.empty_cache() torch.cuda.reset_peak_memory_stats() set_seed(2024) ``` ## 主機モデルのロード ```python model_id = "llm-jp/llm-jp-3-13b" adapter_id = "CLRafaelR/llm-jp-3-13b-ogawa-brewery" dtype = None # Noneにしておけば自動で設定 load_in_4bit = True # 今回は13Bモデルを扱うためTrue model, tokenizer = FastLanguageModel.from_pretrained( model_name=model_id, dtype=dtype, load_in_4bit=load_in_4bit, trust_remote_code=True, ) model = PeftModel.from_pretrained(model, adapter_id, token=HF_TOKEN) ``` ## RAG構築 下記を参考に作成しました llm-book/chapter13/13-3-2-rag-instruct-langchain.ipynb at main · ghmagazine/llm-book https://github.com/ghmagazine/llm-book/blob/main/chapter13/13-3-2-rag-instruct-langchain.ipynb ### 埋め込みモデルのロード ```python # Hugging Face Hubにおけるモデル名を指定 embedding_model_name = "pkshatech/GLuCoSE-base-ja-v2" # モデル名からEmbedding Modelを初期化 embedding_model = HuggingFaceEmbeddings( model_name=embedding_model_name, model_kwargs={ "model_kwargs": { "torch_dtype": torch.float16, # "device": "cuda", } }, encode_kwargs={"normalize_embeddings": False}, ) ``` ### ベクトルデータベースの構築 ```python data_name = "elyza/ELYZA-tasks-100" ELYZA_tasks_100 = load_dataset(data_name)["test"] loader = HuggingFaceDatasetLoader( data_name, "input", ) documents = loader.load() vectorstore = FAISS.from_documents( documents, embedding_model, normalize_L2=True, ) @chain def retriever( query: str, k: int = 4, score_threshold=0.8, ) -> List[Document]: docs, scores = zip( *vectorstore.similarity_search_with_relevance_scores( query, k=k, kwargs={ "score_threshold": score_threshold, }, ) ) filtered_docs = [] for doc, score in zip(docs, scores): if score > score_threshold: doc.metadata["score"] = score print(round(score, 3)) filtered_docs.append(doc) return filtered_docs retrieved_documents = retriever.invoke( "IMEとして機能してください", # k=1, score_threshold=0.45, ) ``` ## 評価用データセットの読み込み ```python datasets = [] with open("../confidential/data/elyza-tasks-100-TV_0.jsonl", "r") as f: item = "" for line in f: line = line.strip() item += line if item.endswith("}"): datasets.append(json.loads(item)) item = "" ``` ## 推論 ```python # 推論するためにモデルのモードを変更 FastLanguageModel.for_inference(model) streamer = TextStreamer( tokenizer, skip_prompt=True, skip_special_tokens=True, ) results = [] start_time = time.time() for dt in tqdm(datasets): input = dt["input"] raw_shots = retriever.invoke( input, k=1, score_threshold=0.45, ) if not raw_shots: # ELYZA-tasks-100(オリジナル版)に、いま解こうとしている問題の類似問題がなかった場合 prompt_inst_answer = f"""### 指示\n\n{input}\n\n### 回答\n\n""" # prompt_inst_answer = f"""### 指示\n\n下のタスクへの最終回答に必要な<思考過程>を順序だてて考え、3つの番号付き箇条書きだけで出力してください。\n\nその後で、<最終回答>を出力してください。\n\n### タスク\n\n{input}\n\n### 回答\n\n<思考過程><最終回答>に必要な思考過程3点です。\n\n1. """ else: # ELYZA-tasks-100(オリジナル版)に、いま解こうとしている問題の類似問題があった場合 shots = [] for i, raw_shot in enumerate(raw_shots): shot = f"""### タスク{i + 1}\n\n{raw_shot.page_content.encode().decode('unicode-escape')}\n\n### タスク{i + 1}の回答\n\n{raw_shot.metadata['output']}""" shots.append(shot) formatted_shots = "\n\n".join(shots) num_shots = len(shots) # print(formatted_shots, "\n\n", "=" * 10) prompt_inst_answer = f"""### 指示\n\n{input}\n\n### 回答\n\n""" # prompt_inst_answer = f"""### 指示\n\n以下の類似したタスクを解いてください。\n\n{formatted_shots}\n\n### タスク{num_shots + 1}\n\n{input}\n\n### タスク{num_shots + 1}の回答\n\n先に解いたタスクと同じ方法で、順序立てて考えます。""" print( "=" * 16, "\n\n", prompt_inst_answer, ) inputs = tokenizer( [prompt_inst_answer], return_tensors="pt", ).to(model.device) outputs = model.generate( **inputs, max_new_tokens=512, use_cache=True, do_sample=False, # do_sample=True, # num_beams=5, repetition_penalty=1.2, streamer=streamer, ) prediction = tokenizer.decode( outputs[0][inputs.input_ids.shape[-1] :], skip_special_tokens=True, ) results.append( { "task_id": dt["task_id"], "input": input, "output": prediction, } ) flush() print("-" * 16) end_time = time.time() elapsed_time = datetime.timedelta(seconds=end_time - start_time) print(f"{elapsed_time} elapsed.") ``` ## jsonlファイルとして実行結果を保存 ```python file_name = f"./{adapter_id.split('/')[1]}_output_{datetime.datetime.now(datetime.timezone(datetime.timedelta(hours=9))).strftime('%Y%m%d_%H%M')}" with open( f"{file_name}.jsonl", "w", encoding="utf-8", ) as f: for result in results: json.dump(result, f, ensure_ascii=False) f.write("\n") # polarsデータフレームを作成 df = pl.DataFrame(results) # データフレームをxlsxファイルとして出力 df.write_excel(f"{file_name}.xlsx") ```