pdn2_v08_nkjp_large / train_args.json
czuk's picture
Upload model 0.8
d11a210
{
"data_train": [
"data/nkjp-nested-ttt/train.txt",
"data/nkjp-nested-ttt/valid.txt",
"data/nkjp-nested-ttt/test.txt"
],
"data_tune": [
"data/nkjp-nested-ttt/valid.txt"
],
"data_test": [
"data/nkjp-nested-ttt/test.txt"
],
"pretrained_path": "allegro/herbert-large-cased",
"output_dir": "../poldeepner2_models/dev/nkjp_full/model_nkjp_full_union_256_101_v_003",
"cache_dir": "",
"device": "cuda:0",
"max_seq_length": 256,
"do_eval": false,
"do_lower_case": false,
"train_batch_size": 16,
"eval_batch_size": 16,
"learning_rate": 5e-06,
"num_train_epochs": 20,
"warmup_proportion": 0.0,
"weight_decay": 0.01,
"adam_epsilon": 1e-08,
"max_grad_norm": 1.0,
"seed": 101,
"gradient_accumulation_steps": 1,
"fp16": false,
"fp16_opt_level": "O1",
"loss_scale": 0,
"dropout": 0.2,
"freeze_model": false,
"epoch_save_model": true,
"sequence_generator": "union",
"sequence_generator_for_eval": "context-window",
"training_mix": false,
"wandb": "nkjp_full",
"hidden_size": 1024
}