ssa-perin / config /params.py
larkkin's picture
Add code
991f07c
import yaml
class Params:
def __init__(self):
self.graph_mode = "sequential" # possibilities: {sequential, node-centric, edge-labeled}
self.accumulation_steps = 1 # number of gradient accumulation steps for achieving a bigger batch_size
self.activation = "relu" # transformer (decoder) activation function, supported values: {'relu', 'gelu', 'sigmoid', 'mish'}
self.predict_intensity = False
self.batch_size = 32 # batch size (further divided into multiple GPUs)
self.beta_2 = 0.98 # beta 2 parameter for Adam(W) optimizer
self.blank_weight = 1.0 # weight of cross-entropy loss for predicting an empty label
self.char_embedding = True # use character embedding in addition to bert
self.char_embedding_size = 128 # dimension of the character embedding layer in the character embedding module
self.decoder_delay_steps = 0 # number of initial steps with frozen decoder
self.decoder_learning_rate = 6e-4 # initial decoder learning rate
self.decoder_weight_decay = 1.2e-6 # amount of weight decay
self.dropout_anchor = 0.5 # dropout at the last layer of anchor classifier
self.dropout_edge_label = 0.5 # dropout at the last layer of edge label classifier
self.dropout_edge_presence = 0.5 # dropout at the last layer of edge presence classifier
self.dropout_label = 0.5 # dropout at the last layer of label classifier
self.dropout_transformer = 0.5 # dropout for the transformer layers (decoder)
self.dropout_transformer_attention = 0.1 # dropout for the transformer's attention (decoder)
self.dropout_word = 0.1 # probability of dropping out a whole word from the encoder (in favour of char embedding)
self.encoder = "xlm-roberta-base" # pretrained encoder model
self.encoder_delay_steps = 2000 # number of initial steps with frozen XLM-R
self.encoder_freeze_embedding = True # freeze the first embedding layer in XLM-R
self.encoder_learning_rate = 6e-5 # initial encoder learning rate
self.encoder_weight_decay = 1e-2 # amount of weight decay
self.lr_decay_multiplier = 100
self.epochs = 100 # number of epochs for train
self.focal = True # use focal loss for the label prediction
self.freeze_bert = False # use focal loss for the label prediction
self.group_ops = False # group 'opN' edge labels into one
self.hidden_size_ff = 4 * 768 # hidden size of the transformer feed-forward submodule
self.hidden_size_anchor = 128 # hidden size anchor biaffine layer
self.hidden_size_edge_label = 256 # hidden size for edge label biaffine layer
self.hidden_size_edge_presence = 512 # hidden size for edge label biaffine layer
self.layerwise_lr_decay = 1.0 # layerwise decay of learning rate in the encoder
self.n_attention_heads = 8 # number of attention heads in the decoding transformer
self.n_layers = 3 # number of layers in the decoder
self.query_length = 4 # number of queries genereted for each word on the input
self.pre_norm = True # use pre-normalized version of the transformer (as in Transformers without Tears)
self.warmup_steps = 6000 # number of the warm-up steps for the inverse_sqrt scheduler
def init_data_paths(self):
directory_1 = {
"sequential": "node_centric_mrp",
"node-centric": "node_centric_mrp",
"labeled-edge": "labeled_edge_mrp"
}[self.graph_mode]
directory_2 = {
("darmstadt", "en"): "darmstadt_unis",
("mpqa", "en"): "mpqa",
("multibooked", "ca"): "multibooked_ca",
("multibooked", "eu"): "multibooked_eu",
("norec", "no"): "norec",
("opener", "en"): "opener_en",
("opener", "es"): "opener_es",
}[(self.framework, self.language)]
self.training_data = f"{self.data_directory}/{directory_1}/{directory_2}/train.mrp"
self.validation_data = f"{self.data_directory}/{directory_1}/{directory_2}/dev.mrp"
self.test_data = f"{self.data_directory}/{directory_1}/{directory_2}/test.mrp"
self.raw_training_data = f"{self.data_directory}/raw/{directory_2}/train.json"
self.raw_validation_data = f"{self.data_directory}/raw/{directory_2}/dev.json"
return self
def load_state_dict(self, d):
for k, v in d.items():
setattr(self, k, v)
return self
def state_dict(self):
members = [attr for attr in dir(self) if not callable(getattr(self, attr)) and not attr.startswith("__")]
return {k: self.__dict__[k] for k in members}
def load(self, args):
with open(args.config, "r", encoding="utf-8") as f:
params = yaml.safe_load(f)
self.load_state_dict(params)
self.init_data_paths()
def save(self, json_path):
with open(json_path, "w", encoding="utf-8") as f:
d = self.state_dict()
yaml.dump(d, f)