Elron's picture
Pushing deberta-v3-large-offensive to hub
5509d58
jbsub -queue x86_6h -cores 4+1 -mem 30g -require a100 -o outputs/train/tweet_eval2/offensive/deberta-v3-large-offensive-lr7e-6-gas2-ls0.0/train.log /dccstor/tslm/envs/anaconda3/envs/tslm-gen/bin/python train_clf.py --model_name_or_path microsoft/deberta-v3-large --train_file data/tweet_eval/offensive/train.csv --validation_file data/tweet_eval/offensive/validation.csv --do_train --do_eval --per_device_train_batch_size 16 --per_device_eval_batch_size 16 --max_seq_length 256 --learning_rate 7e-6 --output_dir outputs/train/tweet_eval2/offensive/deberta-v3-large-offensive-lr7e-6-gas2-ls0.0 --evaluation_strategy steps --save_strategy no --warmup_steps 50 --num_train_epochs 10 --overwrite_output_dir --logging_steps 100 --gradient_accumulation_steps 2 --label_smoothing_factor 0.0 --report_to clearml --metric_for_best_model accuracy --logging_dir outputs/train/tweet_eval2/offensive/deberta-v3-large-offensive-lr7e-6-gas2-ls0.0/tb \; rm -rf outputs/train/tweet_eval2/offensive/deberta-v3-large-offensive-lr7e-6-gas2-ls0.0/tb \; rm -rf outputs/train/tweet_eval2/offensive/deberta-v3-large-offensive-lr7e-6-gas2-ls0.0/checkpoint-* \; . outputs/train/tweet_eval2/offensive/deberta-v3-large-offensive-lr7e-6-gas2-ls0.0/run_test.sh