| { |
| "accum_grad": 1, |
| "att_dropout_rate": 0.0, |
| "att_unit": 512, |
| "backend": "pytorch", |
| "beta1": 0.9, |
| "beta2": 0.999, |
| "config2": null, |
| "config3": null, |
| "debugmode": 1, |
| "dropout_rate": 0.0, |
| "dump_hdf5_path": null, |
| "early_stop_criterion": "validation/main/loss", |
| "emb_dropout_rate": 0.0, |
| "embed_unit": 128, |
| "epoch": 50, |
| "gradclip": 1.0, |
| "head": 8, |
| "layer": 16, |
| "lr": 0.0008, |
| "lr_cosine_total": 100000, |
| "lr_cosine_warmup": 1000, |
| "maxlen": 60, |
| "model_module": "transformer", |
| "n_vocab": 5049, |
| "ngpu": 8, |
| "opt": "adam", |
| "patience": 0, |
| "pos_enc": "none", |
| "report_interval_iters": 100, |
| "schedulers": [ |
| [ |
| "lr", |
| "cosine" |
| ] |
| ], |
| "seed": 1, |
| "sortagrad": 0, |
| "test_label": null, |
| "tie_weights": false, |
| "train_dtype": "float32", |
| "unit": 2048, |
| "verbose": 1, |
| "weight_decay": 0.0 |
| } |