pere commited on
Commit
7934d62
1 Parent(s): 71d39c4

Commit from model create scripts

Browse files
config.gin CHANGED
@@ -31,7 +31,7 @@ MODEL_DIR = 'gs://nb-t5x-us-central2/finetuned/scandi3_3stammer_v2_large'
31
  OPTIMIZER = @adafactor.Adafactor()
32
  RANDOM_SEED = 0
33
  TASK_FEATURE_LENGTHS = {'inputs': 512, 'targets': 512}
34
- TRAIN_STEPS = 3100000
35
  USE_CACHED_TASKS = False
36
  USE_HARDWARE_RNG = False
37
  VOCABULARY = @seqio.SentencePieceVocabulary()
31
  OPTIMIZER = @adafactor.Adafactor()
32
  RANDOM_SEED = 0
33
  TASK_FEATURE_LENGTHS = {'inputs': 512, 'targets': 512}
34
+ TRAIN_STEPS = 3300000
35
  USE_CACHED_TASKS = False
36
  USE_HARDWARE_RNG = False
37
  VOCABULARY = @seqio.SentencePieceVocabulary()
config.json CHANGED
@@ -13,6 +13,7 @@
13
  "initializer_factor": 1.0,
14
  "is_encoder_decoder": true,
15
  "layer_norm_epsilon": 1e-06,
 
16
  "model_type": "t5",
17
  "num_decoder_layers": 24,
18
  "num_heads": 16,
@@ -21,12 +22,6 @@
21
  "pad_token_id": 0,
22
  "relative_attention_max_distance": 128,
23
  "relative_attention_num_buckets": 32,
24
- "tie_word_embeddings": false,
25
- "tokenizer_class": "T5Tokenizer",
26
- "torch_dtype": "float32",
27
- "transformers_version": "4.19.2",
28
- "use_cache": true,
29
- "max_length": 512,
30
  "task_specific_params": {
31
  "text-generation": {
32
  "max_length": 512
@@ -34,6 +29,11 @@
34
  "translation": {
35
  "max_length": 512
36
  }
37
- },
 
 
 
 
 
38
  "vocab_size": 250112
39
  }
13
  "initializer_factor": 1.0,
14
  "is_encoder_decoder": true,
15
  "layer_norm_epsilon": 1e-06,
16
+ "max_length": 512,
17
  "model_type": "t5",
18
  "num_decoder_layers": 24,
19
  "num_heads": 16,
22
  "pad_token_id": 0,
23
  "relative_attention_max_distance": 128,
24
  "relative_attention_num_buckets": 32,
 
 
 
 
 
 
25
  "task_specific_params": {
26
  "text-generation": {
27
  "max_length": 512
29
  "translation": {
30
  "max_length": 512
31
  }
32
+ },
33
+ "tie_word_embeddings": false,
34
+ "tokenizer_class": "T5Tokenizer",
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.19.2",
37
+ "use_cache": true,
38
  "vocab_size": 250112
39
  }
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b129583284511c0fc5cb80f0f3da33a5559625ff1a16ad5734d188906e4c6e58
3
  size 4918349339
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:952384b0efdcd4b18b6882eaf7c4b15be2e902d3421059b6bec0f143751837c1
3
  size 4918349339
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3c0aaa35ba972fbbad7b53de5b5af8ecd8c040f99cd132a49a99cee3c59c4b9
3
  size 4918507641
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f42c4d1b64ee95336f0dc5fb01f822e65666c394ab291c24a7e8c497f47234f2
3
  size 4918507641
train/events.out.tfevents.1667421611.t1v-n-b052f6bf-w-1.820156.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:544b309881a8f653e93657c4eb6aa41d856f748d95c8460793389d7b17ed731e
3
+ size 171445
training_eval/translate/events.out.tfevents.1667421612.t1v-n-b052f6bf-w-1.820156.1.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dccf5d96f93ab56d85473d58a9f8e29c8e1f82f7f1a04ad3263aa61ae4c4ee02
3
+ size 138967