Saving weights and logs of step 1000

Files changed (4) hide show

config.json CHANGED Viewed

@@ -19,7 +19,7 @@
   "num_hidden_layers": 12,
   "pad_token_id": 1,
   "position_embedding_type": "absolute",
-  "transformers_version": "4.16.0.dev0",
   "type_vocab_size": 1,
   "use_cache": true,
   "vocab_size": 50265

   "num_hidden_layers": 12,
   "pad_token_id": 1,
   "position_embedding_type": "absolute",
+  "transformers_version": "4.15.0.dev0",
   "type_vocab_size": 1,
   "use_cache": true,
   "vocab_size": 50265

flax_model.msgpack ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:c8a3c81da267a58c6a608e02805349a57f920f73b26045f844af75f892dd9866
+size 498796983

run_mlm_flax.py CHANGED Viewed

@@ -777,10 +777,12 @@ def main():
                 # save checkpoint after each epoch and push checkpoint to the hub
                 if jax.process_index() == 0:
                     params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
-                    model.save_pretrained(training_args.output_dir, params=params)
                     tokenizer.save_pretrained(training_args.output_dir)
-                    if training_args.push_to_hub:
-                        repo.push_to_hub(commit_message=f"Saving weights and logs of step {cur_step}", blocking=False)
     # Eval after training
     if training_args.do_eval:

                 # save checkpoint after each epoch and push checkpoint to the hub
                 if jax.process_index() == 0:
                     params = jax.device_get(jax.tree_map(lambda x: x[0], state.params))
+                    model.save_pretrained(training_args.output_dir,
+                            params=params,
+                            push_to_hub=training_args.push_to_hub,
+                            commit_message=f"Saving weights and logs of step {cur_step}",
+                    )
                     tokenizer.save_pretrained(training_args.output_dir)
     # Eval after training
     if training_args.do_eval:

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff