yhavinga commited on
Commit
311feca
1 Parent(s): cd84b33

Add pytorch model at 320000 steps

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/home/yeb/data/gpt2-medium-dutch",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -11,7 +11,6 @@
11
  "initializer_range": 0.02,
12
  "layer_norm_epsilon": 1e-05,
13
  "model_type": "gpt2",
14
- "torch_dtype": "float32",
15
  "n_ctx": 1024,
16
  "n_embd": 1024,
17
  "n_head": 16,
@@ -35,6 +34,7 @@
35
  "max_length": 50
36
  }
37
  },
 
38
  "transformers_version": "4.13.0",
39
  "use_cache": true,
40
  "vocab_size": 50257
 
1
  {
2
+ "_name_or_path": ".",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
11
  "initializer_range": 0.02,
12
  "layer_norm_epsilon": 1e-05,
13
  "model_type": "gpt2",
 
14
  "n_ctx": 1024,
15
  "n_embd": 1024,
16
  "n_head": 16,
 
34
  "max_length": 50
35
  }
36
  },
37
+ "torch_dtype": "float32",
38
  "transformers_version": "4.13.0",
39
  "use_cache": true,
40
  "vocab_size": 50257
flax_to_pytorch.py CHANGED
@@ -13,7 +13,7 @@ model_fx = FlaxGPT2LMHeadModel.from_pretrained(".")
13
  # model_fx.params = to_f32(model_fx.params)
14
  # model_fx.save_pretrained("./fx")
15
  model_pt = GPT2LMHeadModel.from_pretrained(".", from_flax=True)
16
- model_pt.save_pretrained("./pt")
17
  input_ids = np.asarray(2 * [128 * [0]], dtype=np.int32)
18
  input_ids_pt = torch.tensor(input_ids)
19
  logits_pt = model_pt(input_ids_pt).logits
 
13
  # model_fx.params = to_f32(model_fx.params)
14
  # model_fx.save_pretrained("./fx")
15
  model_pt = GPT2LMHeadModel.from_pretrained(".", from_flax=True)
16
+ model_pt.save_pretrained(".")
17
  input_ids = np.asarray(2 * [128 * [0]], dtype=np.int32)
18
  input_ids_pt = torch.tensor(input_ids)
19
  logits_pt = model_pt(input_ids_pt).logits
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6576b3366a236813f2b96767c8eb783a6d52ed8aa71222557e56edeae404cf0
3
  size 1444576537
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2abd6b504ff54d4765bd621491f332f9f75b1f869c414e5f301d8ec9c054a929
3
  size 1444576537
run_gpt.sh CHANGED
@@ -15,6 +15,7 @@ python run_clm_flax.py \
15
  --output_dir="${MODEL_PATH}" \
16
  --model_type="gpt2" \
17
  --config_name="${MODEL_PATH}" \
 
18
  --tokenizer_name="${MODEL_PATH}" \
19
  --preprocessing_num_workers="96" \
20
  --do_train --do_eval \
@@ -28,7 +29,7 @@ python run_clm_flax.py \
28
  --overwrite_output_dir \
29
  --num_train_epochs="1" \
30
  --logging_steps="500" \
31
- --save_steps="40000" \
32
  --eval_steps="2500"
33
 
34
  # \
 
15
  --output_dir="${MODEL_PATH}" \
16
  --model_type="gpt2" \
17
  --config_name="${MODEL_PATH}" \
18
+ --model_name_or_path="${MODEL_PATH}" \
19
  --tokenizer_name="${MODEL_PATH}" \
20
  --preprocessing_num_workers="96" \
21
  --do_train --do_eval \
 
29
  --overwrite_output_dir \
30
  --num_train_epochs="1" \
31
  --logging_steps="500" \
32
+ --save_steps="20001" \
33
  --eval_steps="2500"
34
 
35
  # \
runs/Dec24/events.out.tfevents.1640112885.t1v-n-f9cfcc28-w-0.187197.0.v2 DELETED
@@ -1 +0,0 @@
1
- ../events.out.tfevents.1640112885.t1v-n-f9cfcc28-w-0.187197.0.v2
 
 
runs/events.out.tfevents.1640112885.t1v-n-f9cfcc28-w-0.187197.0.v2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f4b57034a982516d8b60834fa3bd1e8af2277469ad9973b3ec86241cb292a89
3
- size 17870829
 
 
 
 
runs/events.out.tfevents.1640332964.t1v-n-f9cfcc28-w-0.384322.0.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78a17fcd88365f6bcb23b316b06024faf91960c419d1cab9d39b055a0ae3fcb8
3
- size 49426035
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda007c25947ad3f969a3fc25f74a6182e0cbfb3e0aadc6ec67019ce38e1e545
3
+ size 50470447