fuliucansheng
commited on
Commit
•
72546dd
1
Parent(s):
99f5b49
mass config
Browse files- mass-base-uncased-config.json +2 -2
- mass_for_generation.ini +50 -0
mass-base-uncased-config.json
CHANGED
@@ -9,7 +9,7 @@
|
|
9 |
"decoder_embed_dim": 768,
|
10 |
"decoder_ffn_embed_dim": 3072,
|
11 |
"decoder_layers": 6,
|
12 |
-
"decoder_attention_heads":
|
13 |
"decoder_normalize_before": 0,
|
14 |
"attention_dropout": 0.1,
|
15 |
"activation_dropout": 0.1,
|
@@ -24,7 +24,7 @@
|
|
24 |
"decoder_output_dim": 768,
|
25 |
"decoder_input_dim": 768,
|
26 |
"no_scale_embedding": 0,
|
27 |
-
"adaptive_input":
|
28 |
"tie_adaptive_weights": 0,
|
29 |
"layernorm_embedding": 1,
|
30 |
"encoder_layerdrop": 0,
|
|
|
9 |
"decoder_embed_dim": 768,
|
10 |
"decoder_ffn_embed_dim": 3072,
|
11 |
"decoder_layers": 6,
|
12 |
+
"decoder_attention_heads": 12,
|
13 |
"decoder_normalize_before": 0,
|
14 |
"attention_dropout": 0.1,
|
15 |
"activation_dropout": 0.1,
|
|
|
24 |
"decoder_output_dim": 768,
|
25 |
"decoder_input_dim": 768,
|
26 |
"no_scale_embedding": 0,
|
27 |
+
"adaptive_input": 0,
|
28 |
"tie_adaptive_weights": 0,
|
29 |
"layernorm_embedding": 1,
|
30 |
"encoder_layerdrop": 0,
|
mass_for_generation.ini
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[core/auto]
|
2 |
+
task_type = core/auto/supervised_task
|
3 |
+
cache_dir = ./cache
|
4 |
+
|
5 |
+
# model
|
6 |
+
[core/model/mass]
|
7 |
+
pretrained_name = mass-base-uncased
|
8 |
+
num_beams = 5
|
9 |
+
no_repeat_ngram_size = 0
|
10 |
+
max_gen_seq_length = 15
|
11 |
+
repetition_penalty = 1.0
|
12 |
+
|
13 |
+
# dataset
|
14 |
+
[core/dataset]
|
15 |
+
# data columns: id, num, query, doc, label, score
|
16 |
+
data_name = fuliucansheng/data_for_test
|
17 |
+
|
18 |
+
[core/dataset/train]
|
19 |
+
preprocess_funcs = ['core/process/mass_for_generation(query, doc)']
|
20 |
+
|
21 |
+
[core/dataset/dev]
|
22 |
+
preprocess_funcs = ['core/process/mass_for_tokens(query)', 'core/process/mass_for_next_tokens(doc)']
|
23 |
+
|
24 |
+
[core/dataset/test]
|
25 |
+
preprocess_funcs = ['core/process/mass_for_tokens(query)']
|
26 |
+
|
27 |
+
# process
|
28 |
+
[core/process/mass]
|
29 |
+
pretrained_name = mass-base-uncased
|
30 |
+
max_seq_length = 24
|
31 |
+
max_gen_seq_length = 15
|
32 |
+
|
33 |
+
# task
|
34 |
+
[core/auto/supervised_task]
|
35 |
+
model = core/model/mass_for_generation
|
36 |
+
optim = core/optim/adam
|
37 |
+
dataset = core/dataset/auto
|
38 |
+
loss_fn = core/loss/lm
|
39 |
+
score_fn = core/score/bleu
|
40 |
+
monitor_fns = ['core/score/bleu', 'core/score/rouge1', 'core/score/rouge2', 'core/score/rougel']
|
41 |
+
output_header = query,doc
|
42 |
+
post_process_fn = partial(core/process/mass_for_decode)
|
43 |
+
|
44 |
+
opt_fp16 = O1
|
45 |
+
from_ckpt_dir = ${core/auto:cache_dir}
|
46 |
+
to_ckpt_dir = ${core/auto:cache_dir}
|
47 |
+
output_path = ${core/auto:cache_dir}/output.txt
|
48 |
+
train_batch_size = 128
|
49 |
+
dev_batch_size = 128
|
50 |
+
test_batch_size = 256
|