local env = import "../env.jsonnet"; local base = import "basic.jsonnet"; local pretrained_path = env.str("PRETRAINED_PATH", "cache/basic/best"); local lr = env.json("FT_LR", 5e-5); # training local cuda_devices = base.cuda_devices; { dataset_reader: base.dataset_reader, train_data_path: base.train_data_path, validation_data_path: base.validation_data_path, test_data_path: base.test_data_path, datasets_for_vocab_creation: ["train"], data_loader: base.data_loader, validation_data_loader: base.validation_data_loader, model: { type: "from_archive", archive_file: pretrained_path }, vocabulary: { type: "from_files", directory: pretrained_path + "/vocabulary" }, trainer: { num_epochs: base.trainer.num_epochs, patience: base.trainer.patience, [if std.length(cuda_devices) == 1 then "cuda_device"]: cuda_devices[0], validation_metric: "+arg-c_f", num_gradient_accumulation_steps: base.trainer.num_gradient_accumulation_steps, optimizer: { type: "transformer", base: { type: "adam", lr: lr, }, embeddings_lr: 0.0, encoder_lr: 1e-5, pooler_lr: 1e-5, layer_fix: base.trainer.optimizer.layer_fix, } }, [if std.length(cuda_devices) > 1 then "distributed"]: { "cuda_devices": cuda_devices }, [if std.length(cuda_devices) == 1 then "evaluate_on_test"]: true }