Update run config
Browse files- run_gpt.sh +4 -2
run_gpt.sh
CHANGED
@@ -4,7 +4,7 @@ export HF_PROJECT="gpt2-medium-dutch"
|
|
4 |
|
5 |
# Variables for training the tokenizer and creating the config
|
6 |
export VOCAB_SIZE="50257"
|
7 |
-
export DATASET="
|
8 |
export DATASET_CONFIG="full" # Config of the dataset in the Huggingface Hub
|
9 |
export DATASET_SPLIT="train" # Split to use for training tokenizer and model
|
10 |
export TEXT_FIELD="text" # Field containing the text to be used for training
|
@@ -26,7 +26,9 @@ python run_clm_flax.py \
|
|
26 |
--learning_rate="0.0024" --warmup_steps="5000" \
|
27 |
--adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
|
28 |
--overwrite_output_dir \
|
29 |
-
--num_train_epochs="
|
|
|
|
|
30 |
--logging_steps="500" \
|
31 |
--save_steps="40000" \
|
32 |
--eval_steps="2500" \
|
|
|
4 |
|
5 |
# Variables for training the tokenizer and creating the config
|
6 |
export VOCAB_SIZE="50257"
|
7 |
+
export DATASET="/home/yeb/data/mc4_nl_cleaned/mc4_nl_cleaned.py" # Name of the dataset in the Huggingface Hub
|
8 |
export DATASET_CONFIG="full" # Config of the dataset in the Huggingface Hub
|
9 |
export DATASET_SPLIT="train" # Split to use for training tokenizer and model
|
10 |
export TEXT_FIELD="text" # Field containing the text to be used for training
|
|
|
26 |
--learning_rate="0.0024" --warmup_steps="5000" \
|
27 |
--adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
|
28 |
--overwrite_output_dir \
|
29 |
+
--num_train_epochs="2" \
|
30 |
+
--dataloader_num_workers="64" \
|
31 |
+
--preprocessing_num_workers="64" \
|
32 |
--logging_steps="500" \
|
33 |
--save_steps="40000" \
|
34 |
--eval_steps="2500" \
|