python -m torch.distributed.launch \ --nproc_per_node=8 transformers/examples/pytorch/question-answering/run_qa_nmsqa.py \ --model_name_or_path voidful/phoneme_byt5_g2p_v1 \ --dataset_name Splend1dchan/NMSQA_testupload2 \ --dataloader_num_workers 4 \ --do_train \ --per_device_train_batch_size 1 \ --gradient_accumulation_step 4 \ --learning_rate 3e-5 \ --num_train_epochs 10 \ --warmup_steps 500 \ --logging_steps 50 \ --max_seq_length 1024 \ --doc_stride 256 \ --save_strategy "epoch" \ --ddp_find_unused_parameters=True \ --output_dir ./models/phoneme_byt5_g2p_v1-1024-train-longer \ --overwrite_output_dir #--save_strategy "epoch" \ #allenai/longformer-base-4096 #voidful/phone-led-base-16384 # google/byt5-small # google/long-t5-tglobal-base # voidful/phoneme-longt5-global