toanbku's picture
Create README.md
4348062
Training log: https://wandb.ai/toanbku/reward-model/runs/2ekuy6lg/overview
```
deepspeed --include=localhost:0 --master_port 61000 trainer_rm.py --config \
defaults_rm oasst-rm-2.1-pythia-1.4b \
--cache_dir /home/ubuntu/OA/model/model_training/.cache \
--per_device_eval_batch_size 1 --per_device_train_batch_size 1 \
--wandb_entity toanbku --deepspeed
```
```
oasst-rm-2.1-pythia-1.4b:
is_reward_model: true
pooling: last
datasets:
- oasst_export:
lang: "en"
hf_dataset_name: toanbku/oa-df
val_split: 0.1
use_custom_sampler: true
sort_by_length: false
model_name: OpenAssistant/oasst-rm-2.1-pythia-1.4b-epoch-2.5
learning_rate: 8e-6
residual_dropout: 0.01
weight_decay: 0.0
dtype: float32
max_length: 2048
use_flash_attention: true
warmup_steps: 2
gradient_accumulation_steps: 2
per_device_train_batch_size: 1
per_device_eval_batch_size: 2
num_train_epochs: 2
eval_steps: 50
save_steps: 100
use_system_tag: false
system_property_dropout: 0.5
system_add_length: false
```