Spaces:

qgyd2021
/

reward_model_gpt2_stack_exchange

Sleeping

App Files Files Community

qgyd2021 commited on Sep 29, 2023

Commit

a974b8f

•

1 Parent(s): 18dfcaf

[update]add code

Browse files

Files changed (11) hide show

.gitignore +10 -0
README.md +3 -3
examples/reward_model/reward_model_gpt2_stack_exchange/1.prepare_data.py +45 -0
examples/reward_model/reward_model_gpt2_stack_exchange/2.train_model.py +340 -0
examples/reward_model/reward_model_gpt2_stack_exchange/3.merge_lora.py +70 -0
examples/reward_model/reward_model_gpt2_stack_exchange/4.test_model.py +92 -0
examples/reward_model/reward_model_gpt2_stack_exchange/run.sh +139 -0
examples/reward_model/reward_model_gpt2_stack_exchange/stop.sh +5 -0
main.py +128 -0
project_settings.py +12 -0
requirements.txt +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+.git/
+.idea/
+cache/
+flagged/
+gradio_cached_examples/
+hub_datasets/
+**/__pycache__/

README.md CHANGED Viewed

@@ -1,11 +1,11 @@
 ---
-title: Reward Model Gpt2 Stack Exchange
 emoji: 📚
 colorFrom: yellow
 colorTo: red
 sdk: gradio
-sdk_version: 3.45.2
-app_file: app.py
 pinned: false
 ---

 ---
+title: Reward Model GPT2 Stack Exchange
 emoji: 📚
 colorFrom: yellow
 colorTo: red
 sdk: gradio
+sdk_version: gradio==3.38.0
+app_file: main.py
 pinned: false
 ---

examples/reward_model/reward_model_gpt2_stack_exchange/1.prepare_data.py ADDED Viewed

	@@ -0,0 +1,45 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+from datasets import load_dataset
+from project_settings import project_path
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dataset_path", default="lvwerra/stack-exchange-paired", type=str)
+    parser.add_argument(
+        "--dataset_cache_dir",
+        default=(project_path / "hub_datasets").as_posix(),
+        type=str
+    )
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    train_dataset = load_dataset(
+        path=args.dataset_path,
+        data_dir="data/reward",
+        split="train",
+        cache_dir=args.dataset_cache_dir
+    )
+    eval_dataset = load_dataset(
+        path=args.dataset_path,
+        data_dir="data/evaluation",
+        split="train",
+        cache_dir=args.dataset_cache_dir
+    )
+    print(train_dataset)
+    print(eval_dataset)
+    return
+if __name__ == '__main__':
+    main()

examples/reward_model/reward_model_gpt2_stack_exchange/2.train_model.py ADDED Viewed

	@@ -0,0 +1,340 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+"""
+reference:
+https://github.com/huggingface/trl
+https://huggingface.co/docs/trl/main/en/reward_trainer
+https://huggingface.co/docs/trl/index
+https://huggingface.co/blog/trl-peft
+https://medium.com/towards-generative-ai/reward-model-training-2209d1befb5f
+https://github.com/huggingface/trl/blob/main/examples/research_projects/stack_llama/scripts/reward_modeling.py
+# Tensorboard View
+tensorboard \
+--logdir=file_dir/serialization_dir/runs/Sep22_09-36-16_nlp \
+--port=8008 \
+--bind_all
+http://10.75.27.247:8008/
+"""
+import argparse
+from dataclasses import dataclass, field
+import os
+from typing import Any, Dict, List, Optional, Union
+import evaluate
+import numpy as np
+import torch
+import torch.nn as nn
+from datasets import load_dataset
+from peft import LoraConfig, TaskType, get_peft_model
+from transformers import (
+    AutoModelForSequenceClassification,
+    AutoTokenizer,
+    HfArgumentParser,
+    PreTrainedTokenizerBase,
+    Trainer,
+    TrainerCallback,
+    TrainingArguments,
+)
+from transformers.utils import PaddingStrategy
+from transformers.models.gpt2.modeling_gpt2 import GPT2ForSequenceClassification
+from transformers.trainer_utils import EvalPrediction, IntervalStrategy
+from project_settings import project_path
+@dataclass
+class ScriptArguments:
+    # dataset
+    dataset_path: str = field(default="lvwerra/stack-exchange-paired")
+    dataset_cache_dir: str = field(default=(project_path / "hub_datasets").as_posix())
+    train_subset: Optional[int] = field(default=-1)
+    eval_subset: Optional[int] = field(default=10000)
+    # cache
+    cache_dir: str = field(default="cache_dir")
+    # model
+    model_name: Optional[str] = field(default="gpt2")
+    num_labels: Optional[int] = field(default=1)
+    last_checkpoint: Optional[str] = field(default="last_checkpoint")
+    # tokenizer
+    tokenizer_name: Optional[str] = field(default=None)
+    # dataset process
+    max_length: Optional[int] = field(default=512)
+    # lora
+    lora_rank: int = field(default=64)
+    lora_alpha: int = field(default=32)
+    lora_dropout: float = field(default=0.05)
+    # training_args
+    output_dir: Optional[str] = field(default="output_dir")
+    evaluation_strategy: Union[IntervalStrategy, str] = field(default="steps")
+    per_device_train_batch_size: Optional[int] = field(default=4)
+    per_device_eval_batch_size: Optional[int] = field(default=1)
+    gradient_accumulation_steps: Optional[int] = field(default=1)
+    learning_rate: Optional[float] = field(default=2e-5)
+    weight_decay: Optional[float] = field(default=0.001)
+    num_train_epochs: float = field(default=1.0)
+    lr_scheduler_type: Optional[str] = field(default="linear")
+    logging_strategy: Union[IntervalStrategy, str] = field(default="steps")
+    save_strategy: Union[IntervalStrategy, str] = field(default="steps")
+    logging_steps: float = field(default=500)
+    bf16: bool = field(default=False)
+    fp16: bool = field(default=False)
+    local_rank: Optional[int] = field(default=-1, metadata={"help": "Used for multi-gpu"})
+    eval_steps: Optional[float] = field(default=5000)
+    save_steps: float = field(default=500)
+    save_total_limit: Optional[int] = field(default=5)
+    remove_unused_columns: Optional[bool] = field(default=False)
+    label_names: Optional[List[str]] = field(default=None)
+    deepspeed: Optional[str] = field(default=None)
+    optim: Optional[str] = field(default="adamw_hf")
+    report_to: Optional[List[str]] = field(default=None)
+    resume_from_checkpoint: Optional[bool] = field(default=False)
+    gradient_checkpointing: Optional[bool] = field(default=False)
+    # addition
+    eval_first_step: Optional[bool] = field(
+        default=False,
+        metadata={"help": "Whether to run eval after the first step"},
+    )
+def get_args():
+    parser = HfArgumentParser(ScriptArguments)
+    args = parser.parse_args_into_dataclasses()[0]
+    return args
+@dataclass
+class RewardDataCollatorWithPadding:
+    tokenizer: PreTrainedTokenizerBase
+    padding: Union[bool, str, PaddingStrategy] = PaddingStrategy.MAX_LENGTH
+    max_length: Optional[int] = None
+    pad_to_multiple_of: Optional[int] = None
+    return_tensors: str = "pt"
+    def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
+        features_j = []
+        features_k = []
+        for feature in features:
+            features_j.append({
+                "input_ids": feature["input_ids_j"],
+                "attention_mask": feature["attention_mask_j"],
+            })
+            features_k.append({
+                "input_ids": feature["input_ids_k"],
+                "attention_mask": feature["attention_mask_k"],
+            })
+        batch_j = self.tokenizer.pad(
+            features_j,
+            padding=self.padding,
+            max_length=self.max_length,
+            pad_to_multiple_of=self.pad_to_multiple_of,
+            return_tensors=self.return_tensors,
+        )
+        batch_k = self.tokenizer.pad(
+            features_k,
+            padding=self.padding,
+            max_length=self.max_length,
+            pad_to_multiple_of=self.pad_to_multiple_of,
+            return_tensors=self.return_tensors,
+        )
+        batch = {
+            "input_ids_j": batch_j["input_ids"],
+            "attention_mask_j": batch_j["attention_mask"],
+            "input_ids_k": batch_k["input_ids"],
+            "attention_mask_k": batch_k["attention_mask"],
+            "return_loss": True,
+        }
+        return batch
+class RewardTrainer(Trainer):
+    # Define how to compute the reward loss.
+    # We use the InstructGPT pairwise logloss: https://arxiv.org/abs/2203.02155
+    def compute_loss(self, model, inputs, return_outputs=False):
+        rewards_j = model(input_ids=inputs["input_ids_j"], attention_mask=inputs["attention_mask_j"])[0]
+        rewards_k = model(input_ids=inputs["input_ids_k"], attention_mask=inputs["attention_mask_k"])[0]
+        loss = - nn.functional.logsigmoid(rewards_j - rewards_k).mean()
+        if return_outputs:
+            return loss, {"rewards_j": rewards_j, "rewards_k": rewards_k}
+        return loss
+class EvaluateFirstStepCallback(TrainerCallback):
+    def on_step_end(self, args, state, control, **kwargs):
+        if state.global_step == 1:
+            control.should_evaluate = True
+def main():
+    args = get_args()
+    # dataset
+    train_dataset = load_dataset(
+        path=args.dataset_path,
+        data_dir="data/reward",
+        split="train",
+        cache_dir=args.dataset_cache_dir
+    )
+    if args.train_subset > 0:
+        train_dataset = train_dataset.select(range(args.train_subset))
+    eval_dataset = load_dataset(
+        path=args.dataset_path,
+        data_dir="data/evaluation",
+        split="train",
+        cache_dir=args.dataset_cache_dir
+    )
+    if args.eval_subset > 0:
+        eval_dataset = eval_dataset.select(range(args.eval_subset))
+    # training_args
+    training_args = TrainingArguments(
+        output_dir=args.output_dir,
+        evaluation_strategy=args.evaluation_strategy,
+        per_device_train_batch_size=args.per_device_train_batch_size,
+        per_device_eval_batch_size=args.per_device_eval_batch_size,
+        gradient_accumulation_steps=args.gradient_accumulation_steps,
+        learning_rate=args.learning_rate,
+        weight_decay=args.weight_decay,
+        num_train_epochs=args.num_train_epochs,
+        lr_scheduler_type=args.lr_scheduler_type,
+        logging_strategy=args.logging_strategy,
+        logging_steps=args.logging_steps,
+        save_strategy=args.save_strategy,
+        bf16=args.bf16,
+        fp16=args.fp16,
+        local_rank=args.local_rank,
+        eval_steps=args.eval_steps,
+        save_steps=args.save_steps,
+        save_total_limit=args.save_total_limit,
+        remove_unused_columns=args.remove_unused_columns,
+        label_names=list() if args.label_names is None else args.label_names,
+        deepspeed=args.deepspeed,
+        optim=args.optim,
+        report_to=args.report_to,
+        resume_from_checkpoint=args.resume_from_checkpoint,
+        gradient_checkpointing=args.gradient_checkpointing,
+    )
+    # tokenizer
+    tokenizer_name = args.tokenizer_name if args.tokenizer_name is not None else args.model_name
+    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, use_auth_token=True)
+    # model
+    model = AutoModelForSequenceClassification.from_pretrained(
+        args.model_name,
+        num_labels=args.num_labels,
+    )
+    peft_config = LoraConfig(
+        task_type=TaskType.SEQ_CLS,
+        inference_mode=False,
+        r=args.lora_rank,
+        lora_alpha=args.lora_alpha,
+        lora_dropout=args.lora_dropout,
+    )
+    model = get_peft_model(model, peft_config)
+    model.print_trainable_parameters()
+    # Need to do this for gpt2, because it doesn't have an official pad token.
+    tokenizer.pad_token = tokenizer.eos_token
+    model.config.pad_token_id = tokenizer.eos_token_id
+    model.config.use_cache = not args.gradient_checkpointing
+    original_columns = train_dataset.column_names
+    # Turn the dataset into pairs of post + summaries,
+    # where text_j is the preferred question + answer and text_k is the other.
+    # Then tokenize the dataset.
+    def preprocess_function(examples):
+        new_examples = {
+            "input_ids_j": [],
+            "attention_mask_j": [],
+            "input_ids_k": [],
+            "attention_mask_k": [],
+        }
+        for question, response_j, response_k in zip(examples["question"], examples["response_j"], examples["response_k"]):
+            tokenized_j = tokenizer("Question: " + question + "\n\nAnswer: " + response_j,
+                                    max_length=args.max_length, truncation=True)
+            tokenized_k = tokenizer("Question: " + question + "\n\nAnswer: " + response_k,
+                                    max_length=args.max_length, truncation=True)
+            new_examples["input_ids_j"].append(tokenized_j["input_ids"])
+            new_examples["attention_mask_j"].append(tokenized_j["attention_mask"])
+            new_examples["input_ids_k"].append(tokenized_k["input_ids"])
+            new_examples["attention_mask_k"].append(tokenized_k["attention_mask"])
+        return new_examples
+    # preprocess the dataset and filter out QAs that are longer than script_args.max_length
+    train_dataset = train_dataset.map(
+        preprocess_function,
+        batched=True,
+        num_proc=os.cpu_count() // 2,
+        remove_columns=original_columns,
+        cache_file_name=os.path.join(args.cache_dir, 'train.cache')
+    )
+    train_dataset = train_dataset.filter(
+        lambda x: len(x["input_ids_j"]) <= args.max_length and len(x["input_ids_k"]) <= args.max_length,
+        num_proc=os.cpu_count() // 2,
+    )
+    eval_dataset = eval_dataset.map(
+        preprocess_function,
+        batched=True,
+        num_proc=os.cpu_count() // 2,
+        remove_columns=original_columns,
+        cache_file_name=os.path.join(args.cache_dir, 'eval.cache')
+    )
+    eval_dataset = eval_dataset.filter(
+        lambda x: len(x["input_ids_j"]) <= args.max_length and len(x["input_ids_k"]) <= args.max_length,
+        num_proc=os.cpu_count() // 2,
+    )
+    # Define the metric that we'll use for validation.
+    accuracy = evaluate.load("accuracy")
+    def compute_metrics(eval_pred: EvalPrediction) -> Dict[str, Any]:
+        predictions, _ = eval_pred
+        # Here, predictions is rewards_j and rewards_k.
+        # We want to see how much of the time rewards_j > rewards_k.
+        predictions = np.argmax(predictions, axis=0)
+        labels = np.zeros(predictions.shape)
+        return accuracy.compute(predictions=predictions, references=labels)
+    # Train the model, woohoo.
+    trainer = RewardTrainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        compute_metrics=compute_metrics,
+        data_collator=RewardDataCollatorWithPadding(tokenizer=tokenizer,
+                                                    padding="max_length",
+                                                    max_length=args.max_length),
+    )
+    if args.eval_first_step:
+        trainer.add_callback(EvaluateFirstStepCallback())
+    trainer.train(args.resume_from_checkpoint)
+    print("Saving last checkpoint of the model")
+    last_checkpoint = os.path.join(args.output_dir, args.last_checkpoint)
+    model.save_pretrained(last_checkpoint)
+    return
+if __name__ == '__main__':
+    main()

examples/reward_model/reward_model_gpt2_stack_exchange/3.merge_lora.py ADDED Viewed

	@@ -0,0 +1,70 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+from peft import PeftModel
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
+import torch
+"""
+使用该脚本，将lora的权重合并大base model中
+"""
+def get_args():
+    """
+    python3 3.merge_lora.py \
+    --pretrained_model_name_or_path /data/tianxing/PycharmProjects/Transformers/pretrained_models/huggingface/gpt2 \
+    --adapter_name_or_path /data/tianxing/PycharmProjects/Transformers/examples/reward_model/reward_model_gpt2_stack/file_dir/serialization_dir/last_checkpoint \
+    --save_directory /data/tianxing/PycharmProjects/Transformers/trained_models/reward_model_gpt2_stack
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--pretrained_model_name_or_path",
+        default="YeungNLP/firefly-chatglm2-6b",
+        type=str
+    )
+    parser.add_argument(
+        "--adapter_name_or_path",
+        default="YeungNLP/firefly-baichuan-7b-qlora-sft",
+        type=str
+    )
+    parser.add_argument("--save_directory", default="save_directory", type=str)
+    parser.add_argument("--num_labels", default=1, type=int)
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    config = AutoConfig.from_pretrained(
+        args.pretrained_model_name_or_path,
+        trust_remote_code=True,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        args.pretrained_model_name_or_path,
+        trust_remote_code=True,
+        # llama不支持fast
+        use_fast=False if config.model_type == 'llama' else True
+    )
+    model = AutoModelForSequenceClassification.from_pretrained(
+        args.pretrained_model_name_or_path,
+        num_labels=args.num_labels,
+    )
+    model = PeftModel.from_pretrained(model, args.adapter_name_or_path, device_map={"": "cpu"})
+    model = model.merge_and_unload()
+    tokenizer.save_pretrained(args.save_directory)
+    model.save_pretrained(args.save_directory)
+    return
+if __name__ == '__main__':
+    main()

examples/reward_model/reward_model_gpt2_stack_exchange/4.test_model.py ADDED Viewed

	@@ -0,0 +1,92 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+import os
+import sys
+pwd = os.path.abspath(os.path.dirname(__file__))
+sys.path.append(os.path.join(pwd, '../../../'))
+from project_settings import project_path
+hf_hub_cache = (project_path / "cache/huggingface/hub").as_posix()
+os.environ["HUGGINGFACE_HUB_CACHE"] = hf_hub_cache
+import torch
+import torch.nn as nn
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from transformers.models.deberta_v2.modeling_deberta_v2 import DebertaV2ForSequenceClassification
+from transformers.models.deberta_v2.tokenization_deberta_v2 import DebertaV2Tokenizer
+def get_args():
+    """
+    python3 4.test_model.py --pretrained_model_name_or_path /data/tianxing/PycharmProjects/Transformers/trained_models/reward_model_gpt2_stack
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--pretrained_model_name_or_path",
+        default=(project_path / "trained_models/reward_model_gpt2_stack").as_posix(),
+        type=str
+    )
+    parser.add_argument("--question", default="I know the question has been asked thousands of times, but I'll ask it again: is there a way (even patchy) to write/read a dumb text file with Javascript or Protoype ? This is only for debug purposes, and is not designed for production. The thing is I need it to work with (at least) both Firefox and IE (preferably under Windows). Thanks in advance !", type=str)
+    parser.add_argument(
+        "--response_j",
+        default="**It *is* possible to read/write to a local file via JavaScript**: take a look at [TiddlyWIki](http://www.tiddlywiki.com/). *(Caveat: only works for local documents.)* I have actually written a [Single Page Application](http://softwareas.com/towards-a-single-page-application-framework) (SPA) using [twFile](http://jquery.tiddlywiki.org/twFile.html), a part of the TiddlyWiki codebase: 1. Works in different browsers: (IE, Firefox, Chrome) 2. This code is a little old now. TiddlyWiki abandoned the jQuery plugin design a while ago. (Look at the [current TiddlyWiki filesystem.js](http://dev.tiddlywiki.org/browser/Trunk/core/js/FileSystem.js) for more a more recent implementation. It's not isolated for you like the twFile plug-in, though). 3. Although written as a jQuery plug-in, I've studied the code and it is almost completely decoupled from jQuery. **Update:** I have uploaded a [proof-of-concept](http://coolcases.com/jeopardy/) that accesses a local file via JavaScript. * Modifying this application to write to a file is trivial. * I have not tried to get this to work as a file served from a web server, but it should be possible since there are [server-side implementations of TiddlyWiki](http://tiddlywiki.org/wiki/Can_I_use_TiddlyWiki_as_a_multi-user/collaborative/server_based_wiki%3F)<>. **Update:** So it looks like the server side implementations of TiddlyWiki use a server \"adapter\" to modify a file stored on the server, similar to [Peter's description](https://stackoverflow.com/questions/3195720/write-a-file-with-prototype-or-plain-javascript/3195752#3195752). The pure JavaScript method will probably not work if the page is served from a web server due to cross-domain security limitations.",
+        type=str
+    )
+    parser.add_argument(
+        "--response_k",
+        default="Javascript in browsers doesn't allow you to write local files, for **security reasons**. This **may change with time**, but as for now you have to **deal with it**.",
+        type=str
+    )
+    parser.add_argument('--max_length', default=512, type=int)
+    parser.add_argument('--device', default="cuda" if torch.cuda.is_available() else "cpu", type=str)
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    tokenizer = AutoTokenizer.from_pretrained(args.pretrained_model_name_or_path)
+    model = AutoModelForSequenceClassification.from_pretrained(
+        args.pretrained_model_name_or_path,
+        num_labels=1,
+    )
+    model.eval()
+    tokenizer.pad_token = tokenizer.eos_token
+    model.config.pad_token_id = tokenizer.eos_token_id
+    text_j = "Question: {}\n\nAnswer: {}".format(args.question, args.response_j)
+    text_k = "Question: {}\n\nAnswer: {}".format(args.question, args.response_k)
+    text_encoded = tokenizer.__call__([text_j, text_k],
+                                      padding="longest",
+                                      max_length=args.max_length,
+                                      truncation=True
+                                      )
+    input_ids = text_encoded["input_ids"]
+    attention_mask = text_encoded["attention_mask"]
+    input_ids = torch.tensor(input_ids, dtype=torch.long)
+    attention_mask = torch.tensor(attention_mask, dtype=torch.long)
+    outputs = model.forward(input_ids=input_ids, attention_mask=attention_mask)
+    pooled_logits = outputs[0]
+    pooled_logits = pooled_logits.cpu().detach()
+    score = nn.functional.sigmoid(pooled_logits)
+    print(score.shape)
+    print(score)
+    return
+if __name__ == '__main__':
+    main()

examples/reward_model/reward_model_gpt2_stack_exchange/run.sh ADDED Viewed

	@@ -0,0 +1,139 @@

+#!/usr/bin/env bash
+# sh run.sh --stage 0 --stop_stage 0 --system_version centos
+# sh run.sh --stage 1 --stop_stage 1 --system_version centos
+# sh run.sh --stage 2 --stop_stage 2 --system_version centos
+# sh run.sh --stage 3 --stop_stage 3 --system_version centos
+# bitsandbytes
+export LD_LIBRARY_PATH="/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
+# params
+system_version="windows";
+verbose=true;
+stage=0 # start from 0 if you need to start from data preparation
+stop_stage=5
+pretrained_model_supplier=
+pretrained_model_name=gpt2
+last_checkpoint_dir=last_checkpoint
+final_model_name=reward_model_gpt2_stack
+# parse options
+while true; do
+  [ -z "${1:-}" ] && break;  # break if there are no arguments
+  case "$1" in
+    --*) name=$(echo "$1" | sed s/^--// | sed s/-/_/g);
+      eval '[ -z "${'"$name"'+xxx}" ]' && echo "$0: invalid option $1" 1>&2 && exit 1;
+      old_value="(eval echo \\$$name)";
+      if [ "${old_value}" == "true" ] || [ "${old_value}" == "false" ]; then
+        was_bool=true;
+      else
+        was_bool=false;
+      fi
+      # Set the variable to the right value-- the escaped quotes make it work if
+      # the option had spaces, like --cmd "queue.pl -sync y"
+      eval "${name}=\"$2\"";
+      # Check that Boolean-valued arguments are really Boolean.
+      if $was_bool && [[ "$2" != "true" && "$2" != "false" ]]; then
+        echo "$0: expected \"true\" or \"false\": $1 $2" 1>&2
+        exit 1;
+      fi
+      shift 2;
+      ;;
+    *) break;
+  esac
+done
+$verbose && echo "system_version: ${system_version}"
+work_dir="$(pwd)"
+file_dir="${work_dir}/file_dir"
+cache_dir="${file_dir}/cache_dir"
+serialization_dir="${file_dir}/serialization_dir"
+pretrained_models_dir="${work_dir}/../../../pretrained_models/huggingface/${pretrained_model_supplier}"
+final_model_dir="${work_dir}/../../../trained_models/${final_model_name}";
+mkdir -p "${file_dir}"
+mkdir -p "${cache_dir}"
+mkdir -p "${serialization_dir}"
+mkdir -p "${pretrained_models_dir}"
+mkdir -p "${final_model_dir}"
+export PYTHONPATH="${work_dir}/../../.."
+if [ $system_version == "windows" ]; then
+  alias python3='C:/Users/tianx/PycharmProjects/virtualenv/Transformers/Scripts/python.exe'
+elif [ $system_version == "centos" ]; then
+  # conda activate Transformers
+  alias python3='/usr/local/miniconda3/envs/Transformers/bin/python3'
+elif [ $system_version == "ubuntu" ]; then
+  alias python3='/usr/local/miniconda3/envs/Transformers/bin/python3'
+elif [ $system_version == "macos" ]; then
+  alias python3='/Users/honey/PycharmProjects/virtualenv/TrainLLM/bin/python'
+fi
+if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
+  $verbose && echo "stage 0: download pretrained model"
+  cd "${pretrained_models_dir}" || exit 1;
+  if [ ! -d "${pretrained_model_name}" ]; then
+    git clone "https://huggingface.co/${pretrained_model_supplier:+$pretrained_model_supplier/}${pretrained_model_name}/"
+    cd "${pretrained_model_name}" || exit 1;
+    rm -rf onnx/
+    rm -rf .git
+    rm -rf .gitattributes
+    rm -rf 64-8bits.tflite
+    rm -rf 64-fp16.tflite
+    rm -rf 64.tflite
+    rm -rf flax_model.msgpack
+    rm -rf model.safetensors
+    rm -rf rust_model.ot
+    rm -rf tf_model.h5
+    rm -rf model.safetensors
+  fi
+fi
+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
+  $verbose && echo "stage 1: prepare data"
+  cd "${work_dir}" || exit 1;
+  python3 1.prepare_data.py
+fi
+if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
+  $verbose && echo "stage 2: train model"
+  cd "${work_dir}" || exit 1;
+  python3 2.train_model.py \
+  --cache_dir "${cache_dir}" \
+  --model_name "${pretrained_models_dir}/${pretrained_model_name}" \
+  --last_checkpoint "${last_checkpoint_dir}" \
+  --output_dir "${serialization_dir}"
+fi
+if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
+  $verbose && echo "stage 3: merge lora"
+  cd "${work_dir}" || exit 1;
+  python3 3.merge_lora.py \
+  --pretrained_model_name_or_path "${pretrained_models_dir}/${pretrained_model_name}" \
+  --adapter_name_or_path "${serialization_dir}/${last_checkpoint_dir}" \
+  --save_directory "${final_model_dir}"
+fi

examples/reward_model/reward_model_gpt2_stack_exchange/stop.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+#!/usr/bin/env bash
+kill -9 `ps -aef | grep 'run.sh' | grep -v grep | awk '{print $2}' | sed 's/\n/ /'`
+kill -9 `ps -aef | grep 'Transformers/bin/python3' | grep -v grep | awk '{print $2}' | sed 's/\n/ /'`

main.py ADDED Viewed

	@@ -0,0 +1,128 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+import io
+import json
+import os
+import re
+from typing import Dict, List
+from project_settings import project_path
+os.environ["HUGGINGFACE_HUB_CACHE"] = (project_path / "cache/huggingface/hub").as_posix()
+import gradio as gr
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+import requests
+import torch
+import torch.nn as nn
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+from project_settings import project_path
+def calc_reward(pretrained_model_name_or_path: str,
+                question: str,
+                response_j: str,
+                response_k: str = None,
+                max_length: int = 512
+                ):
+    tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path)
+    model = AutoModelForSequenceClassification.from_pretrained(
+        pretrained_model_name_or_path,
+        num_labels=1,
+    )
+    model.eval()
+    tokenizer.pad_token = tokenizer.eos_token
+    model.config.pad_token_id = tokenizer.eos_token_id
+    text_j = "Question: {}\n\nAnswer: {}".format(question, response_j)
+    text_k = "Question: {}\n\nAnswer: {}".format(question, response_k)
+    text_encoded = tokenizer.__call__([text_j, text_k],
+                                      padding="longest",
+                                      max_length=max_length,
+                                      truncation=True
+                                      )
+    input_ids = text_encoded["input_ids"]
+    attention_mask = text_encoded["attention_mask"]
+    input_ids = torch.tensor(input_ids, dtype=torch.long)
+    attention_mask = torch.tensor(attention_mask, dtype=torch.long)
+    outputs = model.forward(input_ids=input_ids, attention_mask=attention_mask)
+    pooled_logits = outputs[0]
+    pooled_logits = pooled_logits.cpu().detach()
+    scores = nn.functional.sigmoid(pooled_logits)
+    scores = scores.tolist()
+    scores = [round(score[0], 6) for score in scores]
+    scores = [str(score) for score in scores]
+    result = ", ".join(scores)
+    return result
+def main():
+    description = """
+    The score for response_j and response_k is independent. Set two box for response to facilitate comparison.
+    """
+    examples = [
+        [
+            """There seems to be a lot of software to control (or emulate) mouse input through the keyboard, but what about the opposite? Basically I'm looking for a way to emulate up/down/left/right clicks with mouse movement, at a fast rate (i.e. lots of very short and quick right clicks while I move the mouse to the right) If I have to learn some scripting language to do it, ok, but I don't know if it would even be possible. Note: This is meant to work on fullscreen, and having a way to turn it on/off with an F# key would be awesome! Thanks for your time :)""",
+            """If you're on Windows, what about the On-Screen Keyboard? It's found under **All Programs -> Accessories -> Accessibility** on XP (similar for Vista+) ![alt text](https://i.stack.imgur.com/nPFOE.png)""",
+            """If you want something where you can type with your mouse, then I suggest you take a look at [Dasher](http://www.inference.phy.cam.ac.uk/dasher/). That is, if I take your question title as the question. As I really don't quite understand your question.""",
+        ],
+        [
+            """I have installed the Java 3D API on PC via the exe installer, which simply created a new directory with `j3dcore.jar`, `vecmath.jar`, `j3dutils.jar` in a lib sub-directory and `j3dcore-ogl.dll` in a bin sub-directory. Netbeans had no issues and my code compiled and executed smoothly, however once I built my project and tried to run it from the command prompt I got an `UnsatisfiedLinkError` saying that `no j3dcore-ogl in java.library.path`. Google came to the rescue and gave me 3 viable solutions: * by copying the dll file into my JRE's bin directory * by adding the path of the dll file to the library path (`java -Djava.library.path=dllpath`) * load the dll in the program with `System.load()` (I couldn't get this one to work, actually) My question is: Is there an elegant solution to this problem, that I missed? It seems tedious that for each different PC someone would like to use this program on, he'd have to either copy the dll or add it to the library path before it can run. (Side question: How come Netbeans didn't have a problem with the dll?)""",
+            """*Edit - After re-reading your question, your issue sounds different. However I'm able to get my running like so, by just dropping all dll files in the same directory as the .bat file starting the java process:* *java -classpath ./YourJar.jar;./lib/j3dcore.jar;./lib/vecmath.jar;./lib/j3dutils.jar package.MainClass* *And that works on multiple user's PCs, so I know simply dropping it in the working directory works.* I believe it depends on the version of Java being used - 64 bit or 32 bit. The correct dll file (of the same name) needs to be in the working directory. I think I was getting a similar problem when the wrong dll was being used, and it's not OS-dependent (if your 64 bit OS has 32-bit Java installed, you'd need the 32 bit j3dcore-ogl.dll file). So the question is, which version of Java are you using *(when running outside of your IDE)*, and which version of the dll are you putting (if any) in the working directory? I don't need any dll files in my path settings to get this working on other's PCs, and did not use System.load(), and did NOT copy files into my user's JRE/bin directory - so I know this is possible without the 3 options you mention.""",
+            """I guess DLL are searched in all folders in %PATH% on windows. (LD\_LIBRARY\_PATH for UNIX flavors) Could you try by adding the path to dll to %path% variable? It appears that you are trying package a product with many jars as dependencies. You may benefit from [One-Jar](http://one-jar.sourceforge.net/index.php?page=details&file=native). It claims to have native dll support.""",
+        ],
+        [
+            """``` pt=new Date(2019,11,12,8,2,3) console.log(pt.getFullYear()," ",pt.getMonth()); ``` gives result `2019 " " 11` ``` console.log(pt.getFullYear()+" "+pt.getMonth()); ``` gives the result as `2019 11` What is the difference between using, and + in this example?""",
+            """``` console.log(pt.getFullYear()," ",pt.getMonth()); ``` The above example passes three separate arguments to console.log. What it outputs depends on how `console.log` is implemented. It has changed over time and is little bit different between browsers. When invoked with arguments like in the example, it has access to the variables and can display them with some magic depending on type, for example if they are arrays or objects. In your example it is displayed as: ``` 2019 " " 11 ``` where the numbers are in blue text, indicating that it was a variable of type number, and the empty string is shown in red, indicating that is was a string. Compare this to the following example, where it all is converted to a string before being passed to `console.log` in one argument: ``` console.log(pt.getFullYear()+" "+pt.getMonth()); ``` where it is displayed as ``` 2017 5 ``` with black text, indicating that it was passed as a string in the first parameter. The first parameter to `console.log` can be used as a format string, like `printf` in c and other languages. For example ``` console.log( "%d %d", pt.getFullYear(), pt.getMonth() ); ``` where %d is a place holder for a number. The output is in black text and gives the exact same output as your second example. ``` console.log("%d %d", pt.getFullYear(),pt.getMonth(), pt.getDate()); ``` In the example above, the year and month will be shown in black text, but the date will be in blue. This is because the format string only have two placeholders, but there are three arguments. `console.log` show the extra arguments, using the magic. Documentation: * [Standard](https://console.spec.whatwg.org/) * [Google Chrome](https://developers.google.com/web/tools/chrome-devtools/console/console-reference). * [Mozilla Firefox](https://developer.mozilla.org/en-US/docs/Web/API/Console) * [Microsoft Edge](https://msdn.microsoft.com/library/hh772169.aspx) * [Apple Safari](https://developer.apple.com/library/content/documentation/AppleApplications/Conceptual/Safari_Developer_Guide/Console/Console.html) * [Opera](http://www.opera.com/dragonfly/documentation/console/)""",
+            """console.log is part of the Console API and is accesible in various browsers. You can find its full documentation on [MDN](https://developer.mozilla.org/en-US/docs/Web/API/Console/log). It states that console log has the following parameters: ``` obj1 ... objN ``` > > A list of JavaScript objects to output. The string representations of > each of these objects are appended together in the order listed and > output. > > > So, when you concatenate the parameters you pass only one object to the function and when you pass multiple parameters `console.log` will do the concatenation for you.""",
+        ],
+        [
+            """I have a `<div id="content">`. I want to load the content from <http://vietduc24h.com> into my `div`: ``` <html> <head> <script type="text/javascript"> $(document).ready(function() { $("#content").attr("src","http://vietduc24h.com"); }) </script> </head> <body> <div id="content"></div> </body> </html ``` I don't want to use an iframe. How can I do this?""",
+            """Try this code with the jQuery `Load` function: ``` $('#content').load('http://vietduc24h.com', function() { alert('Load was performed.'); }); ``` If you encounter in security issues because of the Cross-Origin-Resource-Sharing policy than you have to use a proxy in your server code.""",
+            """Try this: ``` $("#content").html('<object data="http://vietduc24h.com">'); ``` Taken from [this answer](https://stackoverflow.com/a/9964050/646668).""",
+        ],
+    ]
+    demo = gr.Interface(
+        fn=calc_reward,
+        inputs=[
+            gr.Dropdown(choices=["qgyd2021/reward_model_gpt2_stack_exchange"],
+                        value="qgyd2021/reward_model_gpt2_stack_exchange",
+                        label="model_name",
+                        ),
+            gr.Text(label="question", lines=2, max_lines=200),
+            gr.Text(label="response_j", lines=4, max_lines=200),
+            gr.Text(label="response_k", lines=4, max_lines=200),
+        ],
+        outputs=[gr.Text(label="reward score", lines=1, max_lines=1)],
+        examples=examples,
+        cache_examples=False,
+        examples_per_page=6,
+        title="Reward Model GPT2 Stack Exchange",
+        description=description,
+    )
+    demo.launch()
+    return
+if __name__ == '__main__':
+    main()

project_settings.py ADDED Viewed

	@@ -0,0 +1,12 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import os
+from pathlib import Path
+project_path = os.path.abspath(os.path.dirname(__file__))
+project_path = Path(project_path)
+if __name__ == '__main__':
+    pass

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio==3.38.0
+transformers==4.30.2
+torch==1.13.1