File size: 1,300 Bytes
72546dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
[core/auto]
task_type = core/auto/supervised_task
cache_dir = ./cache

# model
[core/model/mass]
pretrained_name = mass-base-uncased
num_beams = 5
no_repeat_ngram_size = 0
max_gen_seq_length = 15
repetition_penalty = 1.0

# dataset
[core/dataset]
# data columns: id, num, query, doc, label, score
data_name = fuliucansheng/data_for_test

[core/dataset/train]
preprocess_funcs = ['core/process/mass_for_generation(query, doc)']

[core/dataset/dev]
preprocess_funcs = ['core/process/mass_for_tokens(query)', 'core/process/mass_for_next_tokens(doc)']

[core/dataset/test]
preprocess_funcs = ['core/process/mass_for_tokens(query)']

# process
[core/process/mass]
pretrained_name = mass-base-uncased
max_seq_length = 24
max_gen_seq_length = 15

# task
[core/auto/supervised_task]
model = core/model/mass_for_generation
optim = core/optim/adam
dataset = core/dataset/auto
loss_fn = core/loss/lm
score_fn = core/score/bleu
monitor_fns = ['core/score/bleu', 'core/score/rouge1', 'core/score/rouge2', 'core/score/rougel']
output_header = query,doc
post_process_fn = partial(core/process/mass_for_decode)

opt_fp16 = O1
from_ckpt_dir = ${core/auto:cache_dir}
to_ckpt_dir = ${core/auto:cache_dir}
output_path = ${core/auto:cache_dir}/output.txt
train_batch_size = 128
dev_batch_size = 128
test_batch_size = 256