cahya commited on
Commit
e5e9f73
1 Parent(s): 8696911

added run finetuning

Browse files
Files changed (1) hide show
  1. run_finetuning.sh +29 -0
run_finetuning.sh ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export MODEL_DIR=`pwd`
2
+ export WANDB_ENTITY="cahya"
3
+ export WANDB_PROJECT="gpt2-indonesian"
4
+ export WANDB_LOG_MODEL="true"
5
+
6
+ ./run_clm_flax.py \
7
+ --model_name_or_path="./flax_model.msgpack" \
8
+ --output_dir="${MODEL_DIR}/finetuning2" \
9
+ --model_type="gpt2" \
10
+ --config_name="${MODEL_DIR}" \
11
+ --tokenizer_name="${MODEL_DIR}" \
12
+ --dataset_name="./text_collection" \
13
+ --dataset_config_name="text_collection" \
14
+ --dataset_data_dir="/dataset/fiction/story_all" \
15
+ --do_train --do_eval \
16
+ --block_size="512" \
17
+ --per_device_train_batch_size="8" \
18
+ --per_device_eval_batch_size="8" \
19
+ --learning_rate="0.0000001" --warmup_steps="1000" \
20
+ --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
21
+ --overwrite_output_dir \
22
+ --num_train_epochs="20" \
23
+ --dataloader_num_workers="64" \
24
+ --preprocessing_num_workers="64" \
25
+ --logging_steps="1000" \
26
+ --save_steps="1000" \
27
+ --eval_steps="1000" \
28
+ --validation_split_percentage="10" \
29
+ --push_to_hub="false"