marcoyang's picture
upload train.sh and compute_ppl.sh
157c6c1
#!/usr/bin/env bash
set -eou pipefail
export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
export TORCH_DISTRIBUTED_DEBUG="INFO"
python ./transformer_lm/train.py \
--start-epoch 0 \
--world-size 8 \
--exp-dir transformer_lm/exp_full_libri_16layer_8gpu \
--num-epochs 20 \
--lm-data ./transformer_lm/libri_lm_training_bpe500/sorted-lm-data-libri-lm_maxlen200.pt \
--lm-data-valid ./transformer_lm/libri_lm_training_bpe500/sorted_lm_data-valid.pt \
--use-fp16 0 \
--num-layers 16 \
--batch-size 70