Commit from model create scripts
Browse files
config.gin
CHANGED
@@ -31,7 +31,7 @@ MODEL_DIR = 'gs://nb-t5x-us-central2/finetuned/scandi3_3stammer_v2_large'
|
|
31 |
OPTIMIZER = @adafactor.Adafactor()
|
32 |
RANDOM_SEED = 0
|
33 |
TASK_FEATURE_LENGTHS = {'inputs': 512, 'targets': 512}
|
34 |
-
TRAIN_STEPS =
|
35 |
USE_CACHED_TASKS = False
|
36 |
USE_HARDWARE_RNG = False
|
37 |
VOCABULARY = @seqio.SentencePieceVocabulary()
|
31 |
OPTIMIZER = @adafactor.Adafactor()
|
32 |
RANDOM_SEED = 0
|
33 |
TASK_FEATURE_LENGTHS = {'inputs': 512, 'targets': 512}
|
34 |
+
TRAIN_STEPS = 3300000
|
35 |
USE_CACHED_TASKS = False
|
36 |
USE_HARDWARE_RNG = False
|
37 |
VOCABULARY = @seqio.SentencePieceVocabulary()
|
config.json
CHANGED
@@ -13,6 +13,7 @@
|
|
13 |
"initializer_factor": 1.0,
|
14 |
"is_encoder_decoder": true,
|
15 |
"layer_norm_epsilon": 1e-06,
|
|
|
16 |
"model_type": "t5",
|
17 |
"num_decoder_layers": 24,
|
18 |
"num_heads": 16,
|
@@ -21,12 +22,6 @@
|
|
21 |
"pad_token_id": 0,
|
22 |
"relative_attention_max_distance": 128,
|
23 |
"relative_attention_num_buckets": 32,
|
24 |
-
"tie_word_embeddings": false,
|
25 |
-
"tokenizer_class": "T5Tokenizer",
|
26 |
-
"torch_dtype": "float32",
|
27 |
-
"transformers_version": "4.19.2",
|
28 |
-
"use_cache": true,
|
29 |
-
"max_length": 512,
|
30 |
"task_specific_params": {
|
31 |
"text-generation": {
|
32 |
"max_length": 512
|
@@ -34,6 +29,11 @@
|
|
34 |
"translation": {
|
35 |
"max_length": 512
|
36 |
}
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
38 |
"vocab_size": 250112
|
39 |
}
|
13 |
"initializer_factor": 1.0,
|
14 |
"is_encoder_decoder": true,
|
15 |
"layer_norm_epsilon": 1e-06,
|
16 |
+
"max_length": 512,
|
17 |
"model_type": "t5",
|
18 |
"num_decoder_layers": 24,
|
19 |
"num_heads": 16,
|
22 |
"pad_token_id": 0,
|
23 |
"relative_attention_max_distance": 128,
|
24 |
"relative_attention_num_buckets": 32,
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
"task_specific_params": {
|
26 |
"text-generation": {
|
27 |
"max_length": 512
|
29 |
"translation": {
|
30 |
"max_length": 512
|
31 |
}
|
32 |
+
},
|
33 |
+
"tie_word_embeddings": false,
|
34 |
+
"tokenizer_class": "T5Tokenizer",
|
35 |
+
"torch_dtype": "float32",
|
36 |
+
"transformers_version": "4.19.2",
|
37 |
+
"use_cache": true,
|
38 |
"vocab_size": 250112
|
39 |
}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4918349339
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:952384b0efdcd4b18b6882eaf7c4b15be2e902d3421059b6bec0f143751837c1
|
3 |
size 4918349339
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4918507641
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f42c4d1b64ee95336f0dc5fb01f822e65666c394ab291c24a7e8c497f47234f2
|
3 |
size 4918507641
|
train/events.out.tfevents.1667421611.t1v-n-b052f6bf-w-1.820156.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:544b309881a8f653e93657c4eb6aa41d856f748d95c8460793389d7b17ed731e
|
3 |
+
size 171445
|
training_eval/translate/events.out.tfevents.1667421612.t1v-n-b052f6bf-w-1.820156.1.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dccf5d96f93ab56d85473d58a9f8e29c8e1f82f7f1a04ad3263aa61ae4c4ee02
|
3 |
+
size 138967
|