mattaq's picture
Added model checkpoints, training logs and onnx-exported checkpoint
c282538
experiment_name = "smp_unet++_july28"
base_dir = "/exports/csce/eddie/eng/groups/DunnGroup/matthew/models_gelgenie"
[processing]
base_hardware = "EDDIE"
device = "GPU"
pe = 1
memory = 64
[data]
n_channels = 1
batch_size = 4
num_workers = 1
val_percent = 10
dir_train_mask = [ "/exports/csce/eddie/eng/groups/DunnGroup/matthew/gel_data/lsdb_gels/masks", "/exports/csce/eddie/eng/groups/DunnGroup/matthew/gel_data/nathan_gels/masks", "/exports/csce/eddie/eng/groups/DunnGroup/matthew/gel_data/matthew_gels/masks",]
dir_train_img = [ "/exports/csce/eddie/eng/groups/DunnGroup/matthew/gel_data/lsdb_gels/images", "/exports/csce/eddie/eng/groups/DunnGroup/matthew/gel_data/nathan_gels/images", "/exports/csce/eddie/eng/groups/DunnGroup/matthew/gel_data/matthew_gels/images",]
dir_val_img = "/exports/csce/eddie/eng/groups/DunnGroup/matthew/gel_data/neb_ladders/images"
dir_val_mask = "/exports/csce/eddie/eng/groups/DunnGroup/matthew/gel_data/neb_ladders/masks"
split_training_dataset = false
apply_augmentations = true
padding = true
[model]
model_name = "smp_unetplusplus"
classes = 2
in_channels = 1
encoder_name = "resnet18"
[training]
loss = "both"
lr = 0.0001
epochs = 600
grad_scaler = true
load_checkpoint = false
optimizer_type = "adam"
scheduler_type = "CosineAnnealingWarmRestarts"
save_checkpoint = true
checkpoint_frequency = 1
wandb_track = true
model_cleanup_frequency = 20
wandb_id = "3qembm5h"
[training.scheduler_specs]
restart_period = 100