Patrick von Platen commited on
Commit
015a6fb
1 Parent(s): 148a864

Saving weights and logs of step 11

Browse files
Files changed (26) hide show
  1. config.json +25 -0
  2. events.out.tfevents.1625437573.t1v-n-71556209-w-0.89432.3.v2 +0 -0
  3. events.out.tfevents.1625437701.t1v-n-71556209-w-0.90941.3.v2 +0 -0
  4. events.out.tfevents.1625438030.t1v-n-71556209-w-0.92615.3.v2 +0 -0
  5. events.out.tfevents.1625438668.t1v-n-71556209-w-0.94558.3.v2 +0 -0
  6. events.out.tfevents.1625438693.t1v-n-71556209-w-0.95985.3.v2 +0 -0
  7. events.out.tfevents.1625440116.t1v-n-71556209-w-0.1109.3.v2 +0 -0
  8. events.out.tfevents.1625440164.t1v-n-71556209-w-0.2627.3.v2 +0 -0
  9. events.out.tfevents.1625440256.t1v-n-71556209-w-0.4267.3.v2 +0 -0
  10. events.out.tfevents.1625440361.t1v-n-71556209-w-0.5831.3.v2 +0 -0
  11. events.out.tfevents.1625440511.t1v-n-71556209-w-0.7787.3.v2 +0 -0
  12. events.out.tfevents.1625440601.t1v-n-71556209-w-0.9302.3.v2 +0 -0
  13. events.out.tfevents.1625481291.t1v-n-71556209-w-0.45066.3.v2 +0 -0
  14. events.out.tfevents.1625481405.t1v-n-71556209-w-0.46604.3.v2 +0 -0
  15. events.out.tfevents.1625481546.t1v-n-71556209-w-0.48135.3.v2 +0 -0
  16. events.out.tfevents.1625481641.t1v-n-71556209-w-0.49690.3.v2 +0 -0
  17. events.out.tfevents.1625482384.t1v-n-71556209-w-0.52195.3.v2 +0 -0
  18. events.out.tfevents.1625482495.t1v-n-71556209-w-0.53785.3.v2 +0 -0
  19. events.out.tfevents.1625482589.t1v-n-71556209-w-0.55378.3.v2 +0 -0
  20. events.out.tfevents.1625482771.t1v-n-71556209-w-0.57056.3.v2 +0 -0
  21. events.out.tfevents.1625482925.t1v-n-71556209-w-0.58649.3.v2 +0 -0
  22. events.out.tfevents.1625483165.t1v-n-71556209-w-0.60396.3.v2 +0 -0
  23. flax_model.msgpack +3 -0
  24. run.sh +22 -0
  25. run_mlm_flax_stream.py +1 -0
  26. tokenizer.json +0 -0
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "RobertaForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 2,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 32,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 64,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "roberta",
17
+ "num_attention_heads": 2,
18
+ "num_hidden_layers": 2,
19
+ "pad_token_id": 1,
20
+ "position_embedding_type": "absolute",
21
+ "transformers_version": "4.9.0.dev0",
22
+ "type_vocab_size": 1,
23
+ "use_cache": true,
24
+ "vocab_size": 50265
25
+ }
events.out.tfevents.1625437573.t1v-n-71556209-w-0.89432.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625437701.t1v-n-71556209-w-0.90941.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625438030.t1v-n-71556209-w-0.92615.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625438668.t1v-n-71556209-w-0.94558.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625438693.t1v-n-71556209-w-0.95985.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625440116.t1v-n-71556209-w-0.1109.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625440164.t1v-n-71556209-w-0.2627.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625440256.t1v-n-71556209-w-0.4267.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625440361.t1v-n-71556209-w-0.5831.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625440511.t1v-n-71556209-w-0.7787.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625440601.t1v-n-71556209-w-0.9302.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625481291.t1v-n-71556209-w-0.45066.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625481405.t1v-n-71556209-w-0.46604.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625481546.t1v-n-71556209-w-0.48135.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625481641.t1v-n-71556209-w-0.49690.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625482384.t1v-n-71556209-w-0.52195.3.v2 ADDED
Binary file (40 Bytes). View file
 
events.out.tfevents.1625482495.t1v-n-71556209-w-0.53785.3.v2 ADDED
Binary file (108 Bytes). View file
 
events.out.tfevents.1625482589.t1v-n-71556209-w-0.55378.3.v2 ADDED
Binary file (1.7 kB). View file
 
events.out.tfevents.1625482771.t1v-n-71556209-w-0.57056.3.v2 ADDED
Binary file (1.7 kB). View file
 
events.out.tfevents.1625482925.t1v-n-71556209-w-0.58649.3.v2 ADDED
Binary file (1.7 kB). View file
 
events.out.tfevents.1625483165.t1v-n-71556209-w-0.60396.3.v2 ADDED
Binary file (1.84 kB). View file
 
flax_model.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b58c7d33e4ac33d181533f210f8d095d47e64d263252cb570125e8ff48f508ac
3
+ size 6775349
run.sh ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ ./run_mlm_flax_stream.py \
3
+ --output_dir="./" \
4
+ --model_type="roberta" \
5
+ --config_name="./" \
6
+ --tokenizer_name="./" \
7
+ --dataset_name="oscar" \
8
+ --dataset_config_name="unshuffled_deduplicated_en" \
9
+ --max_seq_length="128" \
10
+ --weight_decay="0.01" \
11
+ --per_device_train_batch_size="2" \
12
+ --per_device_eval_batch_size="2" \
13
+ --learning_rate="3e-4" \
14
+ --warmup_steps="1000" \
15
+ --overwrite_output_dir \
16
+ --adam_beta1="0.9" \
17
+ --adam_beta2="0.98" \
18
+ --num_train_steps="100" \
19
+ --num_eval_samples="20" \
20
+ --logging_steps="10" \
21
+ --eval_steps="10" \
22
+ --push_to_hub
run_mlm_flax_stream.py ADDED
@@ -0,0 +1 @@
 
 
1
+ /home/patrick/transformers/examples/research_projects/jax-projects/dataset-streaming/run_mlm_flax_stream.py
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff