export RUN_NAME=single_latent | |
# TODO update to not use tokenizer, instead use gpt2 one | |
./venv/bin/python train.py \ | |
--t5_model_name_or_path="t5-base" \ | |
--output_dir="output/${RUN_NAME}" \ | |
--overwrite_output_dir \ | |
--do_train \ | |
--n_latent_tokens 1 \ | |
--latent_token_size 32 \ | |
--save_steps="2000" \ | |
--block_size="128" \ | |
--per_device_train_batch_size="100" \ | |
--train_file="INVALID.txt" \ | |
--overwrite_output_dir \ | |
--num_train_epochs="1" \ | |
# 200 batch size, 128 sequence len: ? (breaks) | |
# 100 batch size, 128 sequence len: 252:38:58 | |
# 10 batch size, 128 sequence len: 281:32:53 | |
# Got ~12 hours to train, want 3 saves so one save every 4 hours | |