liangyuxin commited on
Commit
39f7e3a
1 Parent(s): 8cade5d

add configs

Browse files
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "encoder_model_path": "/encoder/",
3
+ "decoder_model_path": "/decoder/",
4
+ "sentencepiece_model_path":"/decoder/cog-pretrain.model",
5
+ "latent_size":256,
6
+ "seed": 42,
7
+ "ratio_increase": 0.5,
8
+ "ratio_zero": 0.05,
9
+ "dim_target_kl": 1.0,
10
+ "length_weighted_loss": true,
11
+ "beta_start": 0.0001,
12
+ "beta_m": 0.1,
13
+ "lambda_adv": 0.0,
14
+ "fb_mode": 1,
15
+ "mlm_probability": 0.15,
16
+ "drop_prob": 0.0,
17
+ "sub_prob": 0.0,
18
+ "shuffle_dist": 0,
19
+ "zeta": 0.0,
20
+ "learning_rate": 5e-05,
21
+ "weight_decay": 0.1,
22
+ "adam_epsilon": 1e-08,
23
+ "max_grad_norm": 1.0,
24
+ "warmup": 0.01,
25
+ "temperature": 1.0,
26
+ "top_k": 0,
27
+ "repetition_penalty": 2.0,
28
+ "top_p": 0.9,
29
+ "std_scale": 1.5,
30
+ "max_out_length": 100,
31
+ "logger": true,
32
+ "checkpoint_callback": null,
33
+ "enable_checkpointing": true,
34
+ "process_position": 0
35
+ }
decoder/cog-pretrain.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ea6f4164152bc58d23e24e48f7bf4187aad72a32e97ec4b3acc832fe183cbc2
3
+ size 1021864
decoder/cog-pretrain.vocab ADDED
The diff for this file is too large to render. See raw diff
 
decoder/config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_layers":32,
3
+ "vocab_size":50048,
4
+ "hidden_size":1600,
5
+ "num_attention_heads":25,
6
+ "embedding_dropout_prob":0.1,
7
+ "attention_dropout_prob":0.1,
8
+ "output_dropout_prob":0.1,
9
+ "max_sequence_length":512,
10
+ "max_memory_length":512,
11
+ "latent_size":256,
12
+ "checkpoint_activations":false,
13
+ "checkpoint_num_layers":1,
14
+ "parallel_output":true,
15
+ "relative_encoding":true
16
+ }
encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForMaskedLM"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "directionality": "bidi",
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
+ "layer_norm_eps": 1e-12,
13
+ "max_position_embeddings": 512,
14
+ "model_type": "bert",
15
+ "num_attention_heads": 12,
16
+ "num_hidden_layers": 12,
17
+ "pad_token_id": 0,
18
+ "pooler_fc_size": 768,
19
+ "pooler_num_attention_heads": 12,
20
+ "pooler_num_fc_layers": 3,
21
+ "pooler_size_per_head": 128,
22
+ "pooler_type": "first_token_transform",
23
+ "type_vocab_size": 2,
24
+ "vocab_size": 21128
25
+ }
encoder/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
encoder/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "do_lower_case": false
3
+ }
encoder/vocab.txt ADDED
The diff for this file is too large to render. See raw diff