naqanet / config.json
dirkgr's picture
Update repository
67b337b
{
"dataset_reader": {
"type": "drop",
"instance_format": "drop",
"passage_length_limit": 400,
"question_length_limit": 50,
"skip_when_all_empty": [
"passage_span",
"question_span",
"addition_subtraction",
"counting"
],
"token_indexers": {
"token_characters": {
"type": "characters",
"min_padding_length": 5
},
"tokens": {
"type": "single_id",
"lowercase_tokens": true
}
}
},
"model": {
"type": "naqanet",
"answering_abilities": [
"passage_span_extraction",
"question_span_extraction",
"addition_subtraction",
"counting"
],
"dropout_prob": 0.1,
"matrix_attention_layer": {
"type": "linear",
"combination": "x,y,x*y",
"tensor_1_dim": 128,
"tensor_2_dim": 128
},
"modeling_layer": {
"type": "qanet_encoder",
"attention_dropout_prob": 0,
"attention_projection_dim": 128,
"conv_kernel_size": 5,
"dropout_prob": 0.1,
"feedforward_hidden_dim": 128,
"hidden_dim": 128,
"input_dim": 128,
"layer_dropout_undecayed_prob": 0.1,
"num_attention_heads": 8,
"num_blocks": 6,
"num_convs_per_block": 2
},
"num_highway_layers": 2,
"phrase_layer": {
"type": "qanet_encoder",
"attention_dropout_prob": 0,
"attention_projection_dim": 128,
"conv_kernel_size": 7,
"dropout_prob": 0.1,
"feedforward_hidden_dim": 128,
"hidden_dim": 128,
"input_dim": 128,
"layer_dropout_undecayed_prob": 0.1,
"num_attention_heads": 8,
"num_blocks": 1,
"num_convs_per_block": 4
},
"regularizer": {
"regexes": [
[
".*",
{
"alpha": 1e-07,
"type": "l2"
}
]
]
},
"text_field_embedder": {
"token_embedders": {
"token_characters": {
"type": "character_encoding",
"embedding": {
"embedding_dim": 64
},
"encoder": {
"type": "cnn",
"embedding_dim": 64,
"ngram_filter_sizes": [
5
],
"num_filters": 200
}
},
"tokens": {
"type": "embedding",
"embedding_dim": 300,
"pretrained_file": "https://allennlp.s3.amazonaws.com/datasets/glove/glove.840B.300d.lower.converted.zip",
"trainable": false
}
}
}
},
"train_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/drop/drop_dataset.zip!drop_dataset/drop_dataset_train.json",
"validation_data_path": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/drop/drop_dataset.zip!drop_dataset/drop_dataset_dev.json",
"trainer": {
"callbacks": [
"tensorboard"
],
"grad_norm": 5,
"moving_average": {
"type": "exponential",
"decay": 0.9999
},
"num_epochs": 50,
"optimizer": {
"type": "adam",
"betas": [
0.8,
0.999
],
"eps": 1e-07,
"lr": 0.0005
},
"patience": 10,
"validation_metric": "+f1"
},
"vocabulary": {
"min_count": {
"token_characters": 200
},
"only_include_pretrained_words": true,
"pretrained_files": {
"tokens": "https://allennlp.s3.amazonaws.com/datasets/glove/glove.840B.300d.lower.converted.zip"
}
},
"data_loader": {
"batch_sampler": {
"type": "bucket",
"batch_size": 16
}
},
"validation_dataset_reader": {
"type": "drop",
"instance_format": "drop",
"passage_length_limit": 1000,
"question_length_limit": 100,
"skip_when_all_empty": [],
"token_indexers": {
"token_characters": {
"type": "characters",
"min_padding_length": 5
},
"tokens": {
"type": "single_id",
"lowercase_tokens": true
}
}
}
}