bayartsogt commited on
Commit
7b82e26
1 Parent(s): 2ddcd1e

Saving weights and logs of epoch 1

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. flax_model.msgpack +3 -0
  3. run_clm_flax.py +1 -0
  4. train_clm.sh +16 -0
.gitattributes CHANGED
@@ -14,3 +14,4 @@
14
  *.pb filter=lfs diff=lfs merge=lfs -text
15
  *.pt filter=lfs diff=lfs merge=lfs -text
16
  *.pth filter=lfs diff=lfs merge=lfs -text
 
 
14
  *.pb filter=lfs diff=lfs merge=lfs -text
15
  *.pt filter=lfs diff=lfs merge=lfs -text
16
  *.pth filter=lfs diff=lfs merge=lfs -text
17
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
flax_model.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e8baa516c885994cf9604ab457577b5d4cb18915f3d35ab67799f4be6cf4a11
3
+ size 497764120
run_clm_flax.py ADDED
@@ -0,0 +1 @@
 
 
1
+ /home/bayartsogtyadamsuren/transformers/examples/flax/language-modeling/run_clm_flax.py
train_clm.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ./run_clm_flax.py \
2
+ --output_dir="${MODEL_DIR}" \
3
+ --model_type="gpt2" \
4
+ --config_name="${MODEL_DIR}" \
5
+ --tokenizer_name="${MODEL_DIR}" \
6
+ --dataset_name="oscar" \
7
+ --dataset_config_name="unshuffled_deduplicated_mn" \
8
+ --do_train --do_eval \
9
+ --block_size="512" \
10
+ --per_device_train_batch_size="64" \
11
+ --per_device_eval_batch_size="64" \
12
+ --learning_rate="5e-3" --warmup_steps="1000" \
13
+ --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
14
+ --overwrite_output_dir \
15
+ --num_train_epochs="20" \
16
+ --push_to_hub