master_thesis_models / src /training_pipeline.py
Hannes Kuchelmeister
cleanup to make ready as submodule
c7be723
import os
from typing import List, Optional
import hydra
from omegaconf import DictConfig
from pytorch_lightning import (
Callback,
LightningDataModule,
LightningModule,
Trainer,
seed_everything,
)
from pytorch_lightning.loggers import LightningLoggerBase
from src import utils
log = utils.get_logger(__name__)
def train(config: DictConfig) -> Optional[float]:
"""Contains the training pipeline.
Can additionally evaluate model on a testset, using best weights achieved during training.
Args:
config (DictConfig): Configuration composed by Hydra.
Returns:
Optional[float]: Metric score for hyperparameter optimization.
"""
# Set seed for random number generators in pytorch, numpy and python.random
if config.get("seed"):
seed_everything(config.seed, workers=True)
# Convert relative ckpt path to absolute path if necessary
ckpt_path = config.trainer.get("resume_from_checkpoint")
if ckpt_path and not os.path.isabs(ckpt_path):
config.trainer.resume_from_checkpoint = os.path.join(
hydra.utils.get_original_cwd(), ckpt_path
)
# Init lightning datamodule
log.info(f"Instantiating datamodule <{config.datamodule._target_}>")
datamodule: LightningDataModule = hydra.utils.instantiate(config.datamodule)
# Init lightning model
log.info(f"Instantiating model <{config.model._target_}>")
model: LightningModule = hydra.utils.instantiate(config.model)
# Init lightning callbacks
callbacks: List[Callback] = []
if "callbacks" in config:
for _, cb_conf in config.callbacks.items():
if "_target_" in cb_conf:
log.info(f"Instantiating callback <{cb_conf._target_}>")
callbacks.append(hydra.utils.instantiate(cb_conf))
# Init lightning loggers
logger: List[LightningLoggerBase] = []
if "logger" in config:
for _, lg_conf in config.logger.items():
if "_target_" in lg_conf:
log.info(f"Instantiating logger <{lg_conf._target_}>")
logger.append(hydra.utils.instantiate(lg_conf))
# Init lightning trainer
log.info(f"Instantiating trainer <{config.trainer._target_}>")
trainer: Trainer = hydra.utils.instantiate(
config.trainer, callbacks=callbacks, logger=logger, _convert_="partial"
)
# Send some parameters from config to all lightning loggers
log.info("Logging hyperparameters!")
utils.log_hyperparameters(
config=config,
model=model,
datamodule=datamodule,
trainer=trainer,
callbacks=callbacks,
logger=logger,
)
# Train the model
if config.get("train"):
log.info("Starting training!")
trainer.fit(model=model, datamodule=datamodule)
# Get metric score for hyperparameter optimization
optimized_metric = config.get("optimized_metric")
if optimized_metric and optimized_metric not in trainer.callback_metrics:
raise Exception(
"Metric for hyperparameter optimization not found! "
"Make sure the `optimized_metric` in `hparams_search` config is correct!"
)
score = trainer.callback_metrics.get(optimized_metric)
# Test the model
if config.get("test"):
ckpt_path = "best"
if not config.get("train") or config.trainer.get("fast_dev_run"):
ckpt_path = None
log.info("Starting testing!")
trainer.test(model=model, datamodule=datamodule, ckpt_path=ckpt_path)
# Make sure everything closed properly
log.info("Finalizing!")
utils.finish(
config=config,
model=model,
datamodule=datamodule,
trainer=trainer,
callbacks=callbacks,
logger=logger,
)
# Print path to best checkpoint
if not config.trainer.get("fast_dev_run") and config.trainer.get("train"):
log.info(f"Best model ckpt at {trainer.checkpoint_callback.best_model_path}")
# Return metric score for hyperparameter optimization
return score