|
{ |
|
"acc_batch_size": -1, |
|
"adam_betas": "0.9,0.999", |
|
"architectures": [ |
|
"TransformerForNLI" |
|
], |
|
"attention_probs_dropout_prob": 0.1, |
|
"bias_proj": "", |
|
"bias_v1": "", |
|
"bias_v2": "", |
|
"bos_token_id": 0, |
|
"clip": 1.0, |
|
"cls": "linear", |
|
"dir": "data/snli_1.0/", |
|
"dropout": 0.1, |
|
"enc": "transformer", |
|
"eos_token_id": 2, |
|
"epochs": 5, |
|
"freeze_transformer": 0, |
|
"gpuid": 1, |
|
"gradient_checkpointing": false, |
|
"hidden_act": "gelu", |
|
"hidden_dropout_prob": 0.1, |
|
"hidden_size": 768, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 3072, |
|
"label_dict": "data/snli_1.0/snli.label.dict", |
|
"label_map_inv": { |
|
"0": "entailment", |
|
"1": "neutral", |
|
"2": "contradiction" |
|
}, |
|
"labels": [ |
|
"entailment", |
|
"neutral", |
|
"contradiction" |
|
], |
|
"layer_norm_eps": 1e-05, |
|
"learning_rate": 3e-05, |
|
"load_file": "", |
|
"loss": "multiclass", |
|
"max_position_embeddings": 514, |
|
"model_type": "transformerfornli", |
|
"num_attention_heads": 12, |
|
"num_hidden_layers": 12, |
|
"num_label": 3, |
|
"optim": "adamw_fp16", |
|
"pad_token_id": 1, |
|
"param_init_type": "xavier_normal", |
|
"percent": 1.0, |
|
"print_every": 500, |
|
"save_file": "./models/robertabase_snli_seed1", |
|
"seed": 1, |
|
"train_data": "data/snli_1.0/snli.train.hdf5", |
|
"train_res": "", |
|
"transformer_type": "roberta-base", |
|
"type_vocab_size": 1, |
|
"val_data": "data/snli_1.0/snli.val.hdf5", |
|
"val_res": "", |
|
"vocab_size": 50265, |
|
"warmup": "linear", |
|
"warmup_perc": 0.1, |
|
"weight_decay": 0.01 |
|
} |
|
|