yhavinga commited on
Commit
aa27a25
1 Parent(s): a58a796

Further pre-training to 2650K steps

Browse files
Files changed (23) hide show
  1. config.gin +1 -1
  2. config.json +3 -2
  3. flax_model.msgpack +2 -2
  4. generation_config.json +7 -0
  5. model.safetensors +3 -0
  6. train/events.out.tfevents.1673860280.t1v-n-e0ca6cd3-w-0.10042.0.v2 +2 -2
  7. train/events.out.tfevents.1698317329.t1v-n-5ae5f195-w-0.26727.0.v2 +3 -0
  8. train/events.out.tfevents.1698358025.t1v-n-5ae5f195-w-0.77780.0.v2 +3 -0
  9. train/events.out.tfevents.1698965229.t1v-n-5ae5f195-w-0.783722.0.v2 +3 -0
  10. train/events.out.tfevents.1699608292.t1v-n-20277917-w-0.15955.0.v2 +3 -0
  11. train/events.out.tfevents.1699696557.t1v-n-20277917-w-0.199146.0.v2 +3 -0
  12. training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1673860280.t1v-n-e0ca6cd3-w-0.10042.1.v2 +2 -2
  13. training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1698317329.t1v-n-5ae5f195-w-0.26727.1.v2 +3 -0
  14. training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1698358025.t1v-n-5ae5f195-w-0.77780.1.v2 +3 -0
  15. training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1698965229.t1v-n-5ae5f195-w-0.783722.1.v2 +3 -0
  16. training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1699608292.t1v-n-20277917-w-0.15955.1.v2 +3 -0
  17. training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1699696557.t1v-n-20277917-w-0.199146.1.v2 +3 -0
  18. training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1673860280.t1v-n-e0ca6cd3-w-0.10042.2.v2 +2 -2
  19. training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1698317329.t1v-n-5ae5f195-w-0.26727.2.v2 +3 -0
  20. training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1698358025.t1v-n-5ae5f195-w-0.77780.2.v2 +3 -0
  21. training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1698965229.t1v-n-5ae5f195-w-0.783722.2.v2 +3 -0
  22. training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1699608292.t1v-n-20277917-w-0.15955.2.v2 +3 -0
  23. training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1699696557.t1v-n-20277917-w-0.199146.2.v2 +3 -0
config.gin CHANGED
@@ -26,7 +26,7 @@ OPTIMIZER = @adafactor.Adafactor()
26
  RANDOM_SEED = None
27
  SHUFFLE_TRAIN_EXAMPLES = True
28
  TASK_FEATURE_LENGTHS = {'inputs': 512, 'targets': 512}
29
- TRAIN_STEPS = 1000000
30
  USE_CACHED_TASKS = False
31
  USE_HARDWARE_RNG = False
32
  VOCABULARY = @seqio.SentencePieceVocabulary()
26
  RANDOM_SEED = None
27
  SHUFFLE_TRAIN_EXAMPLES = True
28
  TASK_FEATURE_LENGTHS = {'inputs': 512, 'targets': 512}
29
+ TRAIN_STEPS = 4000000
30
  USE_CACHED_TASKS = False
31
  USE_HARDWARE_RNG = False
32
  VOCABULARY = @seqio.SentencePieceVocabulary()
config.json CHANGED
@@ -1,8 +1,9 @@
1
  {
2
- "_name_or_path": "yhavinga/ul2_large_dutch_english",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
 
6
  "d_ff": 2816,
7
  "d_kv": 64,
8
  "d_model": 1024,
@@ -26,7 +27,7 @@
26
  "relative_attention_num_buckets": 32,
27
  "tie_word_embeddings": false,
28
  "torch_dtype": "float32",
29
- "transformers_version": "4.24.0",
30
  "use_cache": true,
31
  "vocab_size": 32128
32
  }
1
  {
2
+ "_name_or_path": "hf/ul2-large-dutch-english",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
+ "classifier_dropout": 0.0,
7
  "d_ff": 2816,
8
  "d_kv": 64,
9
  "d_model": 1024,
27
  "relative_attention_num_buckets": 32,
28
  "tie_word_embeddings": false,
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.35.0",
31
  "use_cache": true,
32
  "vocab_size": 32128
33
  }
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4fe142609d6abec1d8c99e2dfe9d118b96b0dd2028283b74f680079da4327e8
3
- size 3132624407
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a77bb6fd4c648157c927ec672b72e04949ca3f695828e176c04f2442e3a7833
3
+ size 1632372682
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.35.0"
7
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:107a338f3dee570eafa21672271f84a71da0de9600ad03ebd78d9f940a4559c4
3
+ size 3132668808
train/events.out.tfevents.1673860280.t1v-n-e0ca6cd3-w-0.10042.0.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:709882af3ea630a061dc2ae2dd36943167f026944e971a6619bb1f21c751ccd6
3
- size 5150450
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2c0d700c69afe0c0a873d63451c08ea30f08c6ec9456d41d8d4a5f31e949307
3
+ size 9937135
train/events.out.tfevents.1698317329.t1v-n-5ae5f195-w-0.26727.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf52561a559563b77e0f01ff8a70a5cc9a17d90fdcf3f78201d8343e4ff80d0b
3
+ size 1066848
train/events.out.tfevents.1698358025.t1v-n-5ae5f195-w-0.77780.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64c910f3937c1ec93baacf7eb54dfc85891b01344ed0a55711349f13d27478e3
3
+ size 15879626
train/events.out.tfevents.1698965229.t1v-n-5ae5f195-w-0.783722.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49ea465f4d3a7a1fea16be708b30ea5aad00c69057940c8bb1c5604e32a3a337
3
+ size 3978579
train/events.out.tfevents.1699608292.t1v-n-20277917-w-0.15955.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38a56bcaf345d286a33a9283dc140864a5c2680aac4a1fa7021dc9db15f4b8ce
3
+ size 6239
train/events.out.tfevents.1699696557.t1v-n-20277917-w-0.199146.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99673475a7912c95b0ed04d99a11dfe4e9d76d0e776df1b8e2a0e8fa1a0f9eaa
3
+ size 13750542
training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1673860280.t1v-n-e0ca6cd3-w-0.10042.1.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5746fd124e9eff6b3cd25fb95d7dab6c25c0dbb288e0c17a975ac68552595329
3
- size 227037
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59a8a85db93f10201d68a78baff68f5abd821520750778d6d4301a0ab70ea70
3
+ size 439876
training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1698317329.t1v-n-5ae5f195-w-0.26727.1.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5715b86a8345b059f0a3d1128db621530fea2d7165406aa9e31d01f63f2ab07e
3
+ size 45898
training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1698358025.t1v-n-5ae5f195-w-0.77780.1.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d00750332b6115e4b47276ca4b89f2f3197253d33961b9c5308a8c9fbaabe27
3
+ size 702005
training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1698965229.t1v-n-5ae5f195-w-0.783722.1.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d238f56ef25ff63769a1094dd2fe13d94356f21ed65df48c76c66705c5bd723a
3
+ size 176064
training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1699608292.t1v-n-20277917-w-0.15955.1.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:447929fb483a1408be0451ef62a8c684588151666cc21b0a8847b1be5088f89a
3
+ size 78
training_eval/mc4_en_nl_ul2_denoising/events.out.tfevents.1699696557.t1v-n-20277917-w-0.199146.1.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86d90034bbc198eb909cc1180cd377e34419ca1057c7be22971aa66bf5f0ceb1
3
+ size 607916
training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1673860280.t1v-n-e0ca6cd3-w-0.10042.2.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6094ab76a010e2e1f9d57d7c8b6f1378d11da15c5df448a33d57402a816340b9
3
- size 227037
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dda1358dbcb0156e3e5b1a6ea8357f02ea138b0b56d8e1d5fa2c07d3bfe1bea4
3
+ size 439876
training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1698317329.t1v-n-5ae5f195-w-0.26727.2.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a2c33419b829fa990b581a9b0231c667c11aa9af91a47c0504a4de459b182e5
3
+ size 45898
training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1698358025.t1v-n-5ae5f195-w-0.77780.2.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51843701c6d6991ebc37e2567d6e3fe191e7eead6c10730051ddae12409d7569
3
+ size 702005
training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1698965229.t1v-n-5ae5f195-w-0.783722.2.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:829b787b512e1e20636c4c37c4d839e863096e26e9ccf324272360cd2ce9a3fb
3
+ size 176064
training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1699608292.t1v-n-20277917-w-0.15955.2.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:447929fb483a1408be0451ef62a8c684588151666cc21b0a8847b1be5088f89a
3
+ size 78
training_eval/ul2_en_nl_mc4_nedd_wiki_news_mix_1/events.out.tfevents.1699696557.t1v-n-20277917-w-0.199146.2.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7bd872b5f4ba0bcd1e0b95cb38cab1cb9ee16a8f37c3299f68e2d8823dad200
3
+ size 607916