fgaim commited on
Commit
0e04b95
1 Parent(s): 227f66d

Update data sources in run script

Browse files
Files changed (1) hide show
  1. run.sh +2 -2
run.sh CHANGED
@@ -3,8 +3,8 @@
3
  --model_type="roberta" \
4
  --config_name="./" \
5
  --tokenizer_name="./" \
6
- --dataset_name="mc4" \
7
- --dataset_config_name="sw" \
8
  --max_seq_length="128" \
9
  --weight_decay="0.025" \
10
  --per_device_train_batch_size="128" \
3
  --model_type="roberta" \
4
  --config_name="./" \
5
  --tokenizer_name="./" \
6
+ --train_file="/home/shared/clean_swahili/train_v1.4.txt" \
7
+ --validation_split_percentage="2" \
8
  --max_seq_length="128" \
9
  --weight_decay="0.025" \
10
  --per_device_train_batch_size="128" \