Add pytorch model at 320000 steps

Files changed (7) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/home/yeb/data/gpt2-medium-dutch",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
@@ -11,7 +11,6 @@
   "initializer_range": 0.02,
   "layer_norm_epsilon": 1e-05,
   "model_type": "gpt2",
-  "torch_dtype": "float32",
   "n_ctx": 1024,
   "n_embd": 1024,
   "n_head": 16,
@@ -35,6 +34,7 @@
       "max_length": 50
     }
   },
   "transformers_version": "4.13.0",
   "use_cache": true,
   "vocab_size": 50257

 {
+  "_name_or_path": ".",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
   "initializer_range": 0.02,
   "layer_norm_epsilon": 1e-05,
   "model_type": "gpt2",
   "n_ctx": 1024,
   "n_embd": 1024,
   "n_head": 16,
       "max_length": 50
     }
   },
+  "torch_dtype": "float32",
   "transformers_version": "4.13.0",
   "use_cache": true,
   "vocab_size": 50257

flax_to_pytorch.py CHANGED Viewed

@@ -13,7 +13,7 @@ model_fx = FlaxGPT2LMHeadModel.from_pretrained(".")
 # model_fx.params = to_f32(model_fx.params)
 # model_fx.save_pretrained("./fx")
 model_pt = GPT2LMHeadModel.from_pretrained(".", from_flax=True)
-model_pt.save_pretrained("./pt")
 input_ids = np.asarray(2 * [128 * [0]], dtype=np.int32)
 input_ids_pt = torch.tensor(input_ids)
 logits_pt = model_pt(input_ids_pt).logits

 # model_fx.params = to_f32(model_fx.params)
 # model_fx.save_pretrained("./fx")
 model_pt = GPT2LMHeadModel.from_pretrained(".", from_flax=True)
+model_pt.save_pretrained(".")
 input_ids = np.asarray(2 * [128 * [0]], dtype=np.int32)
 input_ids_pt = torch.tensor(input_ids)
 logits_pt = model_pt(input_ids_pt).logits

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6576b3366a236813f2b96767c8eb783a6d52ed8aa71222557e56edeae404cf0
 size 1444576537

 version https://git-lfs.github.com/spec/v1
+oid sha256:2abd6b504ff54d4765bd621491f332f9f75b1f869c414e5f301d8ec9c054a929
 size 1444576537

run_gpt.sh CHANGED Viewed

@@ -15,6 +15,7 @@ python run_clm_flax.py \
     --output_dir="${MODEL_PATH}" \
     --model_type="gpt2" \
     --config_name="${MODEL_PATH}" \
     --tokenizer_name="${MODEL_PATH}" \
     --preprocessing_num_workers="96" \
     --do_train --do_eval \
@@ -28,7 +29,7 @@ python run_clm_flax.py \
     --overwrite_output_dir \
     --num_train_epochs="1" \
     --logging_steps="500" \
-    --save_steps="40000" \
     --eval_steps="2500"
 #     \

     --output_dir="${MODEL_PATH}" \
     --model_type="gpt2" \
     --config_name="${MODEL_PATH}" \
+    --model_name_or_path="${MODEL_PATH}" \
     --tokenizer_name="${MODEL_PATH}" \
     --preprocessing_num_workers="96" \
     --do_train --do_eval \
     --overwrite_output_dir \
     --num_train_epochs="1" \
     --logging_steps="500" \
+    --save_steps="20001" \
     --eval_steps="2500"
 #     \

runs/Dec24/events.out.tfevents.1640112885.t1v-n-f9cfcc28-w-0.187197.0.v2 DELETED Viewed

	@@ -1 +0,0 @@
1	- ../events.out.tfevents.1640112885.t1v-n-f9cfcc28-w-0.187197.0.v2

runs/events.out.tfevents.1640112885.t1v-n-f9cfcc28-w-0.187197.0.v2 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3f4b57034a982516d8b60834fa3bd1e8af2277469ad9973b3ec86241cb292a89
-size 17870829

runs/events.out.tfevents.1640332964.t1v-n-f9cfcc28-w-0.384322.0.v2 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:78a17fcd88365f6bcb23b316b06024faf91960c419d1cab9d39b055a0ae3fcb8
-size 49426035

 version https://git-lfs.github.com/spec/v1
+oid sha256:eda007c25947ad3f969a3fc25f74a6182e0cbfb3e0aadc6ec67019ce38e1e545
+size 50470447