CMD: -n 4 -g 4 -t 80 -o llama2-13b-ft-mc4_nl_cleaned_tiny/tiny-3e-4lr+1152tbs+1ep+0.1wd -p llama2-13b-ft-mc4_nl_cleaned -e --preprocessed_dataset /dodrio/scratch/projects/2023_005/llm-finetuning/preprocessed_datasets/mc4_nl_cleaned--tiny-Llama-2-70b-hf-4096 --learning_rate 3e-4 --model_name_or_path meta-llama/Llama-2-13b-hf --per_device_train_batch_size 12 --per_device_eval_batch_size 12 --gradient_accumulation_steps 6 --eval_accumulation_steps 24 --save_total_limit 3 --eval_steps 90 --save_steps 90 --logging_first_step --weight_decay 0.1 --lr_scheduler_type cosine --early_stopping_patience 4 --warmup_ratio 0.03 --deepspeed ds_config_zero2.json --use_flash_attention -r evocpo0z