t5-vae-wiki / train.sh
Fraser's picture
add dataset scripts
2095da4
export RUN_NAME=single_latent
# TODO update to not use tokenizer, instead use gpt2 one
./venv/bin/python train.py \
--t5_model_name_or_path="t5-base" \
--output_dir="output/${RUN_NAME}" \
--overwrite_output_dir \
--do_train \
--n_latent_tokens 1 \
--latent_token_size 32 \
--save_steps="2000" \
--block_size="128" \
--per_device_train_batch_size="100" \
--train_file="INVALID.txt" \
--overwrite_output_dir \
--num_train_epochs="1" \
# 200 batch size, 128 sequence len: ? (breaks)
# 100 batch size, 128 sequence len: 252:38:58
# 10 batch size, 128 sequence len: 281:32:53
# Got ~12 hours to train, want 3 saves so one save every 4 hours