{ "acc_batch_size": -1, "adam_betas": "0.9,0.999", "architectures": [ "TransformerForNLI" ], "attention_probs_dropout_prob": 0.1, "bias_proj": "", "bias_v1": "", "bias_v2": "", "bos_token_id": 0, "clip": 1.0, "cls": "linear", "dir": "data/snli_1.0/", "dropout": 0.1, "enc": "transformer", "eos_token_id": 2, "epochs": 5, "freeze_transformer": 0, "gpuid": 1, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.1, "hidden_size": 768, "initializer_range": 0.02, "intermediate_size": 3072, "label_dict": "data/snli_1.0/snli.label.dict", "label_map_inv": { "0": "entailment", "1": "neutral", "2": "contradiction" }, "labels": [ "entailment", "neutral", "contradiction" ], "layer_norm_eps": 1e-05, "learning_rate": 3e-05, "load_file": "", "loss": "multiclass", "max_position_embeddings": 514, "model_type": "transformerfornli", "num_attention_heads": 12, "num_hidden_layers": 12, "num_label": 3, "optim": "adamw_fp16", "pad_token_id": 1, "param_init_type": "xavier_normal", "percent": 1.0, "print_every": 500, "save_file": "./models/robertabase_snli_seed1", "seed": 1, "train_data": "data/snli_1.0/snli.train.hdf5", "train_res": "", "transformer_type": "roberta-base", "type_vocab_size": 1, "val_data": "data/snli_1.0/snli.val.hdf5", "val_res": "", "vocab_size": 50265, "warmup": "linear", "warmup_perc": 0.1, "weight_decay": 0.01 }