Sentry_image_models
/
clip_large_pretrain_4x256_sdv2_lr3e-5
/clip_large_pretrain_4x256_sdv2_lr3e-5.py
optim_wrapper = dict( | |
optimizer=dict( | |
type='AdamW', lr=3e-05, weight_decay=0.3, _scope_='mmpretrain'), | |
paramwise_cfg=dict( | |
custom_keys=dict({ | |
'.cls_token': dict(decay_mult=0.0), | |
'.pos_embed': dict(decay_mult=0.0) | |
})), | |
type='AmpOptimWrapper', | |
dtype='bfloat16', | |
clip_grad=None) | |
param_scheduler = [ | |
dict(type='CosineAnnealingLR', eta_min=1e-05, by_epoch=False, begin=0) | |
] | |
train_cfg = dict(by_epoch=True, max_epochs=10, val_interval=1) | |
val_cfg = dict() | |
test_cfg = dict() | |
auto_scale_lr = dict(base_batch_size=4096) | |
model = dict( | |
type='ImageClassifier', | |
backbone=dict( | |
frozen_stages=24, | |
type='VisionTransformer', | |
arch='l', | |
img_size=224, | |
patch_size=14, | |
drop_rate=0.1, | |
pre_norm=True, | |
final_norm=False, | |
init_cfg=dict( | |
type='Pretrained', | |
checkpoint='ckpt/openclip-ViT-L-14.pth', | |
prefix='backbone')), | |
neck=dict( | |
type='CLIPProjection', | |
in_channels=1024, | |
out_channels=768, | |
init_cfg=dict( | |
type='Pretrained', | |
checkpoint='ckpt/openclip-ViT-L-14.pth', | |
prefix='backbone')), | |
head=dict( | |
type='LinearClsHead', | |
num_classes=2, | |
in_channels=768, | |
loss=dict(type='CrossEntropyLoss', loss_weight=1.0), | |
init_cfg=None), | |
init_cfg=dict( | |
type='TruncNormal', layer=['Conv2d', 'Linear'], std=0.02, bias=0.0), | |
train_cfg=None) | |
dataset_type = 'CustomDataset' | |
data_preprocessor = dict( | |
num_classes=2, | |
mean=[123.675, 116.28, 103.53], | |
std=[58.395, 57.12, 57.375], | |
to_rgb=True) | |
bgr_mean = [103.53, 116.28, 123.675] | |
bgr_std = [57.375, 57.12, 58.395] | |
train_pipeline = [ | |
dict(type='LoadImageFromFile'), | |
dict( | |
type='RandomResizedCrop', | |
scale=224, | |
backend='pillow', | |
interpolation='bicubic'), | |
dict(type='RandomFlip', prob=0.5, direction='horizontal'), | |
dict(type='PackInputs') | |
] | |
test_pipeline = [ | |
dict(type='LoadImageFromFile'), | |
dict( | |
type='ResizeEdge', | |
scale=256, | |
edge='short', | |
backend='pillow', | |
interpolation='bicubic'), | |
dict(type='CenterCrop', crop_size=224), | |
dict(type='PackInputs') | |
] | |
train_dataloader = dict( | |
pin_memory=True, | |
persistent_workers=True, | |
collate_fn=dict(type='default_collate'), | |
batch_size=128, | |
num_workers=10, | |
dataset=dict( | |
type='ConcatDataset', | |
datasets=[ | |
dict( | |
type='CustomDataset', | |
data_root='/mnt/petrelfs/luzeyu/workspace/fakebench/dataset', | |
ann_file= | |
'/mnt/petrelfs/luzeyu/workspace/fakebench/dataset/meta/train/stablediffusionV2-1-dpmsolver-25-1m.tsv', | |
pipeline=[ | |
dict(type='LoadImageFromFile'), | |
dict( | |
type='RandomResizedCrop', | |
scale=224, | |
backend='pillow', | |
interpolation='bicubic'), | |
dict(type='RandomFlip', prob=0.5, direction='horizontal'), | |
dict(type='PackInputs') | |
]), | |
dict( | |
type='CustomDataset', | |
data_root='', | |
ann_file= | |
'/mnt/petrelfs/luzeyu/workspace/fakebench/dataset/meta/train/cc1m.csv', | |
pipeline=[ | |
dict(type='LoadImageFromFile'), | |
dict( | |
type='RandomResizedCrop', | |
scale=224, | |
backend='pillow', | |
interpolation='bicubic'), | |
dict(type='RandomFlip', prob=0.5, direction='horizontal'), | |
dict(type='PackInputs') | |
]) | |
]), | |
sampler=dict(type='DefaultSampler', shuffle=True)) | |
val_dataloader = dict( | |
pin_memory=True, | |
persistent_workers=True, | |
collate_fn=dict(type='default_collate'), | |
batch_size=128, | |
num_workers=10, | |
dataset=dict( | |
type='ConcatDataset', | |
datasets=[ | |
dict( | |
type='CustomDataset', | |
data_root='/mnt/petrelfs/luzeyu/workspace/fakebench/dataset', | |
ann_file= | |
'/mnt/petrelfs/luzeyu/workspace/fakebench/dataset/meta/val/stablediffusionV2-1-dpmsolver-25-1w.tsv', | |
pipeline=[ | |
dict(type='LoadImageFromFile'), | |
dict( | |
type='RandomResizedCrop', | |
scale=224, | |
backend='pillow', | |
interpolation='bicubic'), | |
dict(type='RandomFlip', prob=0.5, direction='horizontal'), | |
dict(type='PackInputs') | |
]), | |
dict( | |
type='CustomDataset', | |
data_root='', | |
ann_file= | |
'/mnt/petrelfs/luzeyu/workspace/fakebench/dataset/meta/val/cc1w.csv', | |
pipeline=[ | |
dict(type='LoadImageFromFile'), | |
dict( | |
type='RandomResizedCrop', | |
scale=224, | |
backend='pillow', | |
interpolation='bicubic'), | |
dict(type='RandomFlip', prob=0.5, direction='horizontal'), | |
dict(type='PackInputs') | |
]) | |
]), | |
sampler=dict(type='DefaultSampler', shuffle=False)) | |
val_evaluator = [ | |
dict(type='Accuracy', topk=1), | |
dict(type='SingleLabelMetric', average=None) | |
] | |
test_dataloader = dict( | |
pin_memory=True, | |
persistent_workers=True, | |
collate_fn=dict(type='default_collate'), | |
batch_size=128, | |
num_workers=10, | |
dataset=dict( | |
type='ConcatDataset', | |
datasets=[ | |
dict( | |
type='CustomDataset', | |
data_root='/mnt/petrelfs/luzeyu/workspace/fakebench/dataset', | |
ann_file= | |
'/mnt/petrelfs/luzeyu/workspace/fakebench/dataset/meta/val/stablediffusionV2-1-dpmsolver-25-1w.tsv', | |
pipeline=[ | |
dict(type='LoadImageFromFile'), | |
dict( | |
type='RandomResizedCrop', | |
scale=224, | |
backend='pillow', | |
interpolation='bicubic'), | |
dict(type='RandomFlip', prob=0.5, direction='horizontal'), | |
dict(type='PackInputs') | |
]), | |
dict( | |
type='CustomDataset', | |
data_root='', | |
ann_file= | |
'/mnt/petrelfs/luzeyu/workspace/fakebench/dataset/meta/val/cc1w.csv', | |
pipeline=[ | |
dict(type='LoadImageFromFile'), | |
dict( | |
type='RandomResizedCrop', | |
scale=224, | |
backend='pillow', | |
interpolation='bicubic'), | |
dict(type='RandomFlip', prob=0.5, direction='horizontal'), | |
dict(type='PackInputs') | |
]) | |
]), | |
sampler=dict(type='DefaultSampler', shuffle=False)) | |
test_evaluator = [ | |
dict(type='Accuracy', topk=1), | |
dict(type='SingleLabelMetric', average=None) | |
] | |
custom_hooks = [dict(type='EMAHook', momentum=0.0001, priority='ABOVE_NORMAL')] | |
default_scope = 'mmpretrain' | |
default_hooks = dict( | |
timer=dict(type='IterTimerHook'), | |
logger=dict(type='LoggerHook', interval=100), | |
param_scheduler=dict(type='ParamSchedulerHook'), | |
checkpoint=dict(type='CheckpointHook', interval=1), | |
sampler_seed=dict(type='DistSamplerSeedHook'), | |
visualization=dict(type='VisualizationHook', enable=True)) | |
env_cfg = dict( | |
cudnn_benchmark=True, | |
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), | |
dist_cfg=dict(backend='nccl')) | |
vis_backends = [dict(type='LocalVisBackend')] | |
visualizer = dict( | |
type='UniversalVisualizer', | |
vis_backends=[ | |
dict(type='LocalVisBackend'), | |
dict(type='TensorboardVisBackend') | |
]) | |
log_level = 'INFO' | |
load_from = None | |
resume = False | |
randomness = dict(seed=None, deterministic=False) | |
launcher = 'slurm' | |
work_dir = 'workdir/clip_large_pretrain_4x256_sdv2_lr3e-5' | |