|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
|
import pytorch_lightning as pl |
|
from omegaconf import DictConfig |
|
|
|
from nemo.collections.nlp.data.text_normalization import constants |
|
from nemo.collections.nlp.models import DuplexDecoderModel, DuplexTaggerModel |
|
from nemo.utils import logging |
|
|
|
__all__ = ['TAGGER_MODEL', 'DECODER_MODEL', 'MODEL_NAMES', 'instantiate_model_and_trainer'] |
|
|
|
TAGGER_MODEL = 'tagger' |
|
DECODER_MODEL = 'decoder' |
|
MODEL_NAMES = [TAGGER_MODEL, DECODER_MODEL] |
|
|
|
|
|
def instantiate_model_and_trainer(cfg: DictConfig, model_name: str, do_training: bool): |
|
""" Function for instantiating a model and a trainer |
|
Args: |
|
cfg: The config used to instantiate the model and the trainer. |
|
model_name: A str indicates whether the model to be instantiated is a tagger or a decoder (i.e., model_name should be either TAGGER_MODEL or DECODER_MODEL). |
|
do_training: A boolean flag indicates whether the model will be trained or evaluated. |
|
|
|
Returns: |
|
trainer: A PyTorch Lightning trainer |
|
model: A NLPModel that can either be a DuplexTaggerModel or a DuplexDecoderModel |
|
""" |
|
assert model_name in MODEL_NAMES |
|
|
|
|
|
trainer_cfg = cfg.get(f'{model_name}_trainer') |
|
model_cfg = cfg.get(f'{model_name}_model') |
|
pretrained_cfg = cfg.get(f'{model_name}_pretrained_model', None) |
|
|
|
trainer = pl.Trainer(**trainer_cfg) |
|
if not pretrained_cfg: |
|
logging.info(f'Initializing {model_name} model') |
|
if model_name == TAGGER_MODEL: |
|
model = DuplexTaggerModel(model_cfg, trainer=trainer) |
|
if model_name == DECODER_MODEL: |
|
model = DuplexDecoderModel(model_cfg, trainer=trainer) |
|
elif os.path.exists(pretrained_cfg): |
|
logging.info(f'Restoring pretrained {model_name} model from {pretrained_cfg}') |
|
if model_name == TAGGER_MODEL: |
|
model = DuplexTaggerModel.restore_from(pretrained_cfg) |
|
if model_name == DECODER_MODEL: |
|
model = DuplexDecoderModel.restore_from(pretrained_cfg) |
|
else: |
|
logging.info(f'Loading pretrained model {pretrained_cfg}') |
|
if model_name == TAGGER_MODEL: |
|
if pretrained_cfg not in DuplexTaggerModel.get_available_model_names(): |
|
raise ( |
|
ValueError( |
|
f'{pretrained_cfg} not in the list of available Tagger models. Select from {DuplexTaggerModel.list_available_models()}' |
|
) |
|
) |
|
model = DuplexTaggerModel.from_pretrained(pretrained_cfg) |
|
if model_name == DECODER_MODEL: |
|
if pretrained_cfg not in DuplexDecoderModel.get_available_model_names(): |
|
raise ( |
|
ValueError( |
|
f'{pretrained_cfg} not in the list of available Decoder models. Select from {DuplexDecoderModel.list_available_models()}' |
|
) |
|
) |
|
model = DuplexDecoderModel.from_pretrained(pretrained_cfg) |
|
|
|
|
|
if model.lang is None: |
|
model.lang = cfg.lang |
|
assert model.lang in constants.SUPPORTED_LANGS |
|
|
|
|
|
if model_name == DECODER_MODEL and model_cfg.use_cg and cfg.lang == constants.ENGLISH: |
|
if model.cg_normalizer is None: |
|
model.setup_cgs(model_cfg) |
|
|
|
|
|
if do_training: |
|
model.setup_training_data(train_data_config=cfg.data.train_ds) |
|
if model_name == DECODER_MODEL: |
|
model.setup_multiple_validation_data(val_data_config=cfg.data.validation_ds) |
|
else: |
|
model.setup_validation_data(val_data_config=cfg.data.validation_ds) |
|
|
|
logging.info(f'Model {model_name} -- Device {model.device}') |
|
return trainer, model |
|
|