{ "dataset_reader": { "type": "multitask", "readers": { "sentiment": { "type": "sentiment_analysis", "token_indexers": { "transformer": { "type": "pretrained_transformer_mismatched", "max_length": 512, "model_name": "MLRS/BERTu" } } } } }, "model": { "type": "multitask", "arg_name_mapping": { "backbone": { "tokens": "text", "words": "text" } }, "backbone": { "type": "embedder_and_mask", "text_field_embedder": { "token_embedders": { "transformer": { "type": "pretrained_transformer_mismatched_with_dropout", "last_layer_only": false, "layer_dropout": 0.1, "max_length": 512, "model_name": "MLRS/BERTu", "tokenizer_kwargs": {}, "train_parameters": true } } } }, "heads": { "sentiment": { "type": "linear_classifier", "dropout": 0.5, "encoder": { "type": "pass_through", "input_dim": 768 }, "initializer": { "regexes": [ [ ".*projection.*weight", { "type": "xavier_uniform" } ], [ ".*projection.*bias", { "type": "zero" } ], [ ".*tag_bilinear.*weight", { "type": "xavier_uniform" } ], [ ".*tag_bilinear.*bias", { "type": "zero" } ], [ ".*weight_ih.*", { "type": "xavier_uniform" } ], [ ".*weight_hh.*", { "type": "orthogonal" } ], [ ".*bias_ih.*", { "type": "zero" } ], [ ".*bias_hh.*", { "type": "lstm_hidden_bias" } ] ] } } } }, "train_data_path": { "sentiment": "sentiment/mt/train.csv" }, "validation_data_path": { "sentiment": "sentiment/mt/dev.csv" }, "trainer": { "callbacks": [ { "tensorboard_writer": { "should_log_learning_rate": true, "should_log_parameter_statistics": true }, "type": "tensorboard" } ], "cuda_device": 0, "grad_norm": 5, "learning_rate_scheduler": { "type": "ulmfit_sqrt", "affected_group_count": 2, "decay_factor": 0.05, "discriminative_fine_tuning": true, "factor": 5, "gradual_unfreezing": true, "model_size": 1, "start_step": 19, "warmup_steps": 19 }, "num_epochs": 200, "optimizer": { "type": "huggingface_adamw", "betas": [ 0.9, 0.999 ], "correct_bias": false, "lr": 0.0001, "parameter_groups": [ [ [ "text_field_embedder.*transformer_model.embeddings.*_embeddings.*", "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).weight" ], {} ], [ [ "text_field_embedder.*transformer_model.embeddings.LayerNorm.*", "text_field_embedder.*transformer_model.encoder.*.output.LayerNorm.*", "text_field_embedder.*transformer_model.encoder.*.(key|query|value|dense).bias", "text_field_embedder.*transformer_model.pooler.dense.bias" ], { "weight_decay": 0 } ], [ [ "text_field_embedder.*._scalar_mix.*", "text_field_embedder.*transformer_model.pooler.dense.weight", "_head_sentinel", "head_arc_feedforward._linear_layers.*.weight", "child_arc_feedforward._linear_layers.*.weight", "head_tag_feedforward._linear_layers.*.weight", "child_tag_feedforward._linear_layers.*.weight", "arc_attention._weight_matrix", "tag_bilinear.weight", "tag_projection_layer._module.weight", "crf", "linear.weight", "tagger_linear.weight" ], {} ], [ [ "head_arc_feedforward._linear_layers.*.bias", "child_arc_feedforward._linear_layers.*.bias", "head_tag_feedforward._linear_layers.*.bias", "child_tag_feedforward._linear_layers.*.bias", "arc_attention._bias", "tag_bilinear.bias", "tag_projection_layer._module.bias", "linear.bias", "tagger_linear.bias" ], { "weight_decay": 0 } ] ], "weight_decay": 0.01 }, "patience": 20, "validation_metric": [ "+sentiment_fscore" ] }, "data_loader": { "type": "multitask", "scheduler": { "type": "unbalanced_homogeneous_roundrobin", "batch_size": 8, "dataset_sizes": { "sentiment": 595 } }, "shuffle": true }, "distributed": { "cuda_devices": [ 0, 1, 2, 3 ] }, "numpy_seed": 1537, "pytorch_seed": 153, "random_seed": 15370, "validation_data_loader": { "type": "multitask", "scheduler": { "type": "homogeneous_roundrobin", "batch_size": 8 }, "shuffle": true } }