################## # Trainer settings ################## MODEL MeetingNet_Transformer TASK HMNet CRITERION MLECriterion SEED 1033 MAX_NUM_EPOCHS 20 EVAL_PER_UPDATE_NUM 10 UPDATES_PER_EPOCH 20 # The actuall learning rate will be multiplied with the number of GPUs OPTIMIZER RAdam START_LEARNING_RATE 1e-3 LR_SCHEDULER LnrWrmpInvSqRtDcyScheduler WARMUP_STEPS 16000 WARMUP_INIT_LR 1e-4 WARMUP_END_LR 1e-3 # The actuall start learning rate equals START_LEARNING_RATE * GRADIENT_ACCUMULATE_STEP # Model will be updated after every MINI_BATCH * GRADIENT_ACCUMULATE_STEP samples GRADIENT_ACCUMULATE_STEP 5 GRAD_CLIPPING 2 ################## # Task settings ################## # This is the relative path to the directory where this conf file locates USE_REL_DATA_PATH TRAIN_FILE ../ExampleRawData/meeting_summarization/AMI_proprec/train_ami.json DEV_FILE ../ExampleRawData/meeting_summarization/AMI_proprec/valid_ami.json TEST_FILE ../ExampleRawData/meeting_summarization/AMI_proprec/test_ami.json ROLE_DICT_FILE ../ExampleRawData/meeting_summarization/role_dict_ext.json MINI_BATCH 1 MAX_PADDING_RATIO 1 BATCH_READ_AHEAD 10 DOC_SHUFFLE_BUF_SIZE 10 SAMPLE_SHUFFLE_BUFFER_SIZE 10 BATCH_SHUFFLE_BUFFER_SIZE 10 MAX_TRANSCRIPT_WORD 8300 #MAX_SENT_LEN 30 MAX_SENT_LEN 12 # MAX_SENT_NUM 300 MAX_SENT_NUM 60 ################## # Model settings ################## DROPOUT 0.1 VOCAB_DIM 512 ROLE_SIZE 32 ROLE_DIM 16 POS_DIM 16 ENT_DIM 16 USE_ROLE USE_POSENT USE_BOS_TOKEN USE_EOS_TOKEN TRANSFORMER_EMBED_DROPOUT 0.1 TRANSFORMER_RESIDUAL_DROPOUT 0.1 TRANSFORMER_ATTENTION_DROPOUT 0.1 TRANSFORMER_LAYER 6 TRANSFORMER_HEAD 8 TRANSFORMER_POS_DISCOUNT 80 PRE_TOKENIZER TransfoXLTokenizer PRE_TOKENIZER_PATH ../../../third_party/HMNet/ExampleInitModel/transfo-xl-wt103 PYLEARN_MODEL ../../../third_party/HMNet/ExampleInitModel/AMI-finetuned # e.g. PYLEARN_MODEL conf_hmnet_AMI_conf~/run_1/11600 ################## # Tokenizer settings ################## EXTRA_IDS 1000 ################## # Decoding settings ################## BEAM_WIDTH 6 EVAL_TOKENIZED EVAL_LOWERCASE # MAX_GEN_LENGTH 300 MAX_GEN_LENGTH 60 MIN_GEN_LENGTH 10 NO_REPEAT_NGRAM_SIZE 3