flax-community
/

papuGaPT2-large

Text Generation

Inference Endpoints

text-generation-inference

Model card Files Files and versions Community

miwojc commited on Jul 10, 2021

Commit

233af6c

•

1 Parent(s): 0c42b31

Saving weights and logs of step 30000

Files changed (3) hide show

flax_model.msgpack +1 -1
pretrain.sh +5 -3
run_clm_flax.py +2 -2

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c95d9bb38c83f3a8dd2a2300154d85d29447ff3d924a9327a8b732bfb55c0a66
 size 3096134690

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d95ccbc5b88a04092b2ba965a20d6670dc94adfcef72afcf8d59d702a4e382b
 size 3096134690

pretrain.sh CHANGED Viewed

@@ -9,10 +9,12 @@
     --block_size="512" \
     --per_device_train_batch_size="4" \
     --per_device_eval_batch_size="4" \
-    --learning_rate="2e-5" --warmup_steps="16000" \
     --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
     --overwrite_output_dir \
-    --num_train_epochs="5" \
-    --logging_steps="80000" \
     --adafactor \
     --push_to_hub

     --block_size="512" \
     --per_device_train_batch_size="4" \
     --per_device_eval_batch_size="4" \
+    --learning_rate="3e-4" --warmup_steps="16000" \
     --adam_beta1="0.9" --adam_beta2="0.98" --weight_decay="0.01" \
     --overwrite_output_dir \
+    --num_train_epochs="2" \
+    --logging_steps="15000" \
+    --save_steps="30000" \
+    --eval_steps="30000" \
     --adafactor \
     --push_to_hub

run_clm_flax.py CHANGED Viewed

@@ -398,7 +398,8 @@ def main():
         total_length = len(concatenated_examples[list(examples.keys())[0]])
         # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
         # customize this part to your needs.
-        total_length = (total_length // block_size) * block_size
         # Split by chunks of max_len.
         result = {
             k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
@@ -621,7 +622,6 @@ def main():
                 # Save metrics
                 if has_tensorboard and jax.process_index() == 0:
-                    cur_step = epoch * (len(train_dataset) // train_batch_size)
                     write_eval_metric(summary_writer, eval_metrics, cur_step)
             if cur_step % training_args.save_steps == 0 and cur_step > 0:

         total_length = len(concatenated_examples[list(examples.keys())[0]])
         # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
         # customize this part to your needs.
+        if total_length >= block_size:
+            total_length = (total_length // block_size) * block_size
         # Split by chunks of max_len.
         result = {
             k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
                 # Save metrics
                 if has_tensorboard and jax.process_index() == 0:
                     write_eval_metric(summary_writer, eval_metrics, cur_step)
             if cur_step % training_args.save_steps == 0 and cur_step > 0: