File size: 1,087 Bytes
6c1850b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
root = './rimeExtract_dataset/'

manual_seed = 1313
model_source = './bert-base-cantonese'
polyphonic_chars_path = root + 'POLYPHONIC_CHARS.txt'
window_size = 32
num_workers = 2
use_mask = True
use_conditional = True
param_conditional = {
    'bias': True,
    'char-linear': True,
    'pos-linear': False,
    'char+pos-second': True,
}

# for training
exp_name = '20241206_BERT_B_DescWS-Sec-cLin-B_POS_hkcancor_w03'
train_sent_path = root + 'train.sent'
train_lb_path = root + 'train.lb'
valid_sent_path = root + 'dev.sent'
valid_lb_path = root + 'dev.lb'
test_sent_path = root + 'test.sent'
test_lb_path = root + 'test.lb'
batch_size = 128
lr = 5e-5
val_interval = 200
num_iter = 13000
use_pos = True
param_pos = {
    'weight': 0.3,
    'pos_joint_training': True,
    # 'train_pos_path': root + 'train.pos',
    # 'valid_pos_path': root + 'dev.pos',
    # 'test_pos_path': root + 'test.pos',
    'train_pos_path': root + 'train_hkcancor.pos',
    'valid_pos_path': root + 'dev_hkcancor.pos',
    'test_pos_path': root + 'test_hkcancor.pos',
}