zgcr654321
/

classification_training

Model card Files Files and versions Community

zgcr654321 commited on Dec 7, 2023

Commit

a06b5ad

•

1 Parent(s): c1ca0dc

Upload 28 files

Browse files

Files changed (28) hide show

imagenet/van_b0/__pycache__/train_config.cpython-38.pyc +0 -0
imagenet/van_b0/checkpoints/latest.pth +3 -0
imagenet/van_b0/checkpoints/van_b0-acc75.618.pth +3 -0
imagenet/van_b0/log/train.info.log +0 -0
imagenet/van_b0/log/train.info.log.2023-11-21 +0 -0
imagenet/van_b0/log/train.info.log.2023-11-28 +0 -0
imagenet/van_b0/test.sh +1 -0
imagenet/van_b0/test_config.py +55 -0
imagenet/van_b0/train.sh +1 -0
imagenet/van_b0/train_config.py +126 -0
imagenet/van_b1/__pycache__/train_config.cpython-38.pyc +0 -0
imagenet/van_b1/checkpoints/latest.pth +3 -0
imagenet/van_b1/checkpoints/van_b1-acc80.956.pth +3 -0
imagenet/van_b1/log/train.info.log +0 -0
imagenet/van_b1/log/train.info.log.2023-11-28 +0 -0
imagenet/van_b1/test.sh +1 -0
imagenet/van_b1/test_config.py +55 -0
imagenet/van_b1/train.sh +1 -0
imagenet/van_b1/train_config.py +126 -0
imagenet/van_b2/__pycache__/train_config.cpython-38.pyc +0 -0
imagenet/van_b2/checkpoints/latest.pth +3 -0
imagenet/van_b2/checkpoints/van_b2-acc82.322.pth +3 -0
imagenet/van_b2/log/train.info.log +0 -0
imagenet/van_b2/log/train.info.log.2023-11-21 +0 -0
imagenet/van_b2/test.sh +1 -0
imagenet/van_b2/test_config.py +55 -0
imagenet/van_b2/train.sh +1 -0
imagenet/van_b2/train_config.py +126 -0

imagenet/van_b0/__pycache__/train_config.cpython-38.pyc ADDED Viewed

Binary file (3.15 kB). View file

imagenet/van_b0/checkpoints/latest.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e3973460b358593771c4a8c864bb3033f55034f96d4443dd26b71c653ab986c
+size 49711051

imagenet/van_b0/checkpoints/van_b0-acc75.618.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:162b506f60e4a1f1251c9e7de97bd9324ccf758474727277531cd27ee6bce45b
+size 16575333

imagenet/van_b0/log/train.info.log ADDED Viewed

The diff for this file is too large to render. See raw diff

imagenet/van_b0/log/train.info.log.2023-11-21 ADDED Viewed

The diff for this file is too large to render. See raw diff

imagenet/van_b0/log/train.info.log.2023-11-28 ADDED Viewed

The diff for this file is too large to render. See raw diff

imagenet/van_b0/test.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.run --nproc_per_node=2 --master_addr 127.0.1.0 --master_port 10000 ../../../tools/test_classification_model.py --work-dir ./

imagenet/van_b0/test_config.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import sys
+BASE_DIR = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.dirname(
+        os.path.abspath(__file__)))))
+sys.path.append(BASE_DIR)
+from tools.path import ILSVRC2012_path
+from simpleAICV.classification import backbones
+from simpleAICV.classification import losses
+from simpleAICV.classification.datasets.ilsvrc2012dataset import ILSVRC2012Dataset
+from simpleAICV.classification.common import Opencv2PIL, TorchResize, TorchCenterCrop, TorchMeanStdNormalize, ClassificationCollater, load_state_dict
+import torch
+import torchvision.transforms as transforms
+class config:
+    '''
+    for resnet,input_image_size = 224;for darknet,input_image_size = 256
+    '''
+    network = 'van_b0'
+    num_classes = 1000
+    input_image_size = 224
+    scale = 256 / 224
+    model = backbones.__dict__[network](**{
+        'num_classes': num_classes,
+    })
+    # load pretrained model or not
+    trained_model_path = ''
+    load_state_dict(trained_model_path, model)
+    test_criterion = losses.__dict__['CELoss']()
+    test_dataset = ILSVRC2012Dataset(
+        root_dir=ILSVRC2012_path,
+        set_name='val',
+        transform=transforms.Compose([
+            Opencv2PIL(),
+            TorchResize(resize=input_image_size * scale),
+            TorchCenterCrop(resize=input_image_size),
+            TorchMeanStdNormalize(mean=[0.485, 0.456, 0.406],
+                                  std=[0.229, 0.224, 0.225]),
+        ]))
+    test_collater = ClassificationCollater()
+    seed = 0
+    # batch_size is total size
+    batch_size = 256
+    # num_workers is total workers
+    num_workers = 16

imagenet/van_b0/train.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.run --nproc_per_node=2 --master_addr 127.0.1.0 --master_port 10000 ../../../tools/train_classification_model.py --work-dir ./

imagenet/van_b0/train_config.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import os
+import sys
+BASE_DIR = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.dirname(
+        os.path.abspath(__file__)))))
+sys.path.append(BASE_DIR)
+from tools.path import ILSVRC2012_path
+from simpleAICV.classification import backbones
+from simpleAICV.classification import losses
+from simpleAICV.classification.datasets.ilsvrc2012dataset import ILSVRC2012Dataset
+from simpleAICV.classification.common import Opencv2PIL, TorchRandomResizedCrop, TorchRandomHorizontalFlip, RandAugment, TorchResize, TorchCenterCrop, TorchMeanStdNormalize, RandomErasing, ClassificationCollater, MixupCutmixClassificationCollater, load_state_dict
+import torch
+import torchvision.transforms as transforms
+class config:
+    '''
+    for resnet,input_image_size = 224;for darknet,input_image_size = 256
+    '''
+    network = 'van_b0'
+    num_classes = 1000
+    input_image_size = 224
+    scale = 256 / 224
+    model = backbones.__dict__[network](**{
+        'drop_path_prob': 0.1,
+        'num_classes': num_classes,
+    })
+    # load pretrained model or not
+    trained_model_path = '/root/code/SimpleAICV_pytorch_training_examples_on_ImageNet_COCO_ADE20K/pretrained_models/van_weight_convert_from_official_weights/van_b0_pytorch_official_weight_convert.pth'
+    load_state_dict(trained_model_path, model)
+    train_criterion = losses.__dict__['OneHotLabelCELoss']()
+    test_criterion = losses.__dict__['CELoss']()
+    train_dataset = ILSVRC2012Dataset(
+        root_dir=ILSVRC2012_path,
+        set_name='train',
+        transform=transforms.Compose([
+            Opencv2PIL(),
+            TorchRandomResizedCrop(resize=input_image_size),
+            TorchRandomHorizontalFlip(prob=0.5),
+            RandAugment(magnitude=9,
+                        num_layers=2,
+                        resize=input_image_size,
+                        mean=[0.485, 0.456, 0.406],
+                        integer=True,
+                        weight_idx=None,
+                        magnitude_std=0.5,
+                        magnitude_max=None),
+            TorchMeanStdNormalize(mean=[0.485, 0.456, 0.406],
+                                  std=[0.229, 0.224, 0.225]),
+            RandomErasing(prob=0.25, mode='pixel', max_count=1),
+        ]))
+    test_dataset = ILSVRC2012Dataset(
+        root_dir=ILSVRC2012_path,
+        set_name='val',
+        transform=transforms.Compose([
+            Opencv2PIL(),
+            TorchResize(resize=input_image_size * scale),
+            TorchCenterCrop(resize=input_image_size),
+            TorchMeanStdNormalize(mean=[0.485, 0.456, 0.406],
+                                  std=[0.229, 0.224, 0.225]),
+        ]))
+    train_collater = MixupCutmixClassificationCollater(
+        use_mixup=True,
+        mixup_alpha=0.8,
+        cutmix_alpha=1.0,
+        cutmix_minmax=None,
+        mixup_cutmix_prob=1.0,
+        switch_to_cutmix_prob=0.5,
+        mode='batch',
+        correct_lam=True,
+        label_smoothing=0.1,
+        num_classes=1000)
+    test_collater = ClassificationCollater()
+    seed = 0
+    # batch_size is total size
+    batch_size = 256
+    # num_workers is total workers
+    num_workers = 30
+    accumulation_steps = 4
+    optimizer = (
+        'AdamW',
+        {
+            'lr': 1e-4,
+            'global_weight_decay': False,
+            # if global_weight_decay = False
+            # all bias, bn and other 1d params weight set to 0 weight decay
+            'weight_decay': 1e-4,
+            'no_weight_decay_layer_name_list': [],
+        },
+    )
+    scheduler = (
+        'CosineLR',
+        {
+            'warm_up_epochs': 5,
+            'min_lr': 1e-6,
+        },
+    )
+    epochs = 300
+    print_interval = 50
+    sync_bn = False
+    use_amp = False
+    use_compile = False
+    compile_params = {
+        # 'default': optimizes for large models, low compile-time and no extra memory usage.
+        # 'reduce-overhead': optimizes to reduce the framework overhead and uses some extra memory, helps speed up small models, model update may not correct.
+        # 'max-autotune': optimizes to produce the fastest model, but takes a very long time to compile and may failed.
+        'mode': 'default',
+    }
+    use_ema_model = False
+    ema_model_decay = 0.9999

imagenet/van_b1/__pycache__/train_config.cpython-38.pyc ADDED Viewed

Binary file (3.15 kB). View file

imagenet/van_b1/checkpoints/latest.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39b1c45b4bc52676f3e15de8dafb4efb7ccb837e937024ead4d2aecfc7558a54
+size 166723007

imagenet/van_b1/checkpoints/van_b1-acc80.956.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7d54257e90cba48e94e2abcc8b39a24d26096eac4d18122e758ba4d0bc4e3f3
+size 55600905

imagenet/van_b1/log/train.info.log ADDED Viewed

The diff for this file is too large to render. See raw diff

imagenet/van_b1/log/train.info.log.2023-11-28 ADDED Viewed

The diff for this file is too large to render. See raw diff

imagenet/van_b1/test.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.run --nproc_per_node=2 --master_addr 127.0.1.0 --master_port 10000 ../../../tools/test_classification_model.py --work-dir ./

imagenet/van_b1/test_config.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import sys
+BASE_DIR = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.dirname(
+        os.path.abspath(__file__)))))
+sys.path.append(BASE_DIR)
+from tools.path import ILSVRC2012_path
+from simpleAICV.classification import backbones
+from simpleAICV.classification import losses
+from simpleAICV.classification.datasets.ilsvrc2012dataset import ILSVRC2012Dataset
+from simpleAICV.classification.common import Opencv2PIL, TorchResize, TorchCenterCrop, TorchMeanStdNormalize, ClassificationCollater, load_state_dict
+import torch
+import torchvision.transforms as transforms
+class config:
+    '''
+    for resnet,input_image_size = 224;for darknet,input_image_size = 256
+    '''
+    network = 'van_b1'
+    num_classes = 1000
+    input_image_size = 224
+    scale = 256 / 224
+    model = backbones.__dict__[network](**{
+        'num_classes': num_classes,
+    })
+    # load pretrained model or not
+    trained_model_path = ''
+    load_state_dict(trained_model_path, model)
+    test_criterion = losses.__dict__['CELoss']()
+    test_dataset = ILSVRC2012Dataset(
+        root_dir=ILSVRC2012_path,
+        set_name='val',
+        transform=transforms.Compose([
+            Opencv2PIL(),
+            TorchResize(resize=input_image_size * scale),
+            TorchCenterCrop(resize=input_image_size),
+            TorchMeanStdNormalize(mean=[0.485, 0.456, 0.406],
+                                  std=[0.229, 0.224, 0.225]),
+        ]))
+    test_collater = ClassificationCollater()
+    seed = 0
+    # batch_size is total size
+    batch_size = 256
+    # num_workers is total workers
+    num_workers = 16

imagenet/van_b1/train.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.run --nproc_per_node=2 --master_addr 127.0.1.0 --master_port 10000 ../../../tools/train_classification_model.py --work-dir ./

imagenet/van_b1/train_config.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import os
+import sys
+BASE_DIR = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.dirname(
+        os.path.abspath(__file__)))))
+sys.path.append(BASE_DIR)
+from tools.path import ILSVRC2012_path
+from simpleAICV.classification import backbones
+from simpleAICV.classification import losses
+from simpleAICV.classification.datasets.ilsvrc2012dataset import ILSVRC2012Dataset
+from simpleAICV.classification.common import Opencv2PIL, TorchRandomResizedCrop, TorchRandomHorizontalFlip, RandAugment, TorchResize, TorchCenterCrop, TorchMeanStdNormalize, RandomErasing, ClassificationCollater, MixupCutmixClassificationCollater, load_state_dict
+import torch
+import torchvision.transforms as transforms
+class config:
+    '''
+    for resnet,input_image_size = 224;for darknet,input_image_size = 256
+    '''
+    network = 'van_b1'
+    num_classes = 1000
+    input_image_size = 224
+    scale = 256 / 224
+    model = backbones.__dict__[network](**{
+        'drop_path_prob': 0.1,
+        'num_classes': num_classes,
+    })
+    # load pretrained model or not
+    trained_model_path = '/root/code/SimpleAICV_pytorch_training_examples_on_ImageNet_COCO_ADE20K/pretrained_models/van_weight_convert_from_official_weights/van_b1_pytorch_official_weight_convert.pth'
+    load_state_dict(trained_model_path, model)
+    train_criterion = losses.__dict__['OneHotLabelCELoss']()
+    test_criterion = losses.__dict__['CELoss']()
+    train_dataset = ILSVRC2012Dataset(
+        root_dir=ILSVRC2012_path,
+        set_name='train',
+        transform=transforms.Compose([
+            Opencv2PIL(),
+            TorchRandomResizedCrop(resize=input_image_size),
+            TorchRandomHorizontalFlip(prob=0.5),
+            RandAugment(magnitude=9,
+                        num_layers=2,
+                        resize=input_image_size,
+                        mean=[0.485, 0.456, 0.406],
+                        integer=True,
+                        weight_idx=None,
+                        magnitude_std=0.5,
+                        magnitude_max=None),
+            TorchMeanStdNormalize(mean=[0.485, 0.456, 0.406],
+                                  std=[0.229, 0.224, 0.225]),
+            RandomErasing(prob=0.25, mode='pixel', max_count=1),
+        ]))
+    test_dataset = ILSVRC2012Dataset(
+        root_dir=ILSVRC2012_path,
+        set_name='val',
+        transform=transforms.Compose([
+            Opencv2PIL(),
+            TorchResize(resize=input_image_size * scale),
+            TorchCenterCrop(resize=input_image_size),
+            TorchMeanStdNormalize(mean=[0.485, 0.456, 0.406],
+                                  std=[0.229, 0.224, 0.225]),
+        ]))
+    train_collater = MixupCutmixClassificationCollater(
+        use_mixup=True,
+        mixup_alpha=0.8,
+        cutmix_alpha=1.0,
+        cutmix_minmax=None,
+        mixup_cutmix_prob=1.0,
+        switch_to_cutmix_prob=0.5,
+        mode='batch',
+        correct_lam=True,
+        label_smoothing=0.1,
+        num_classes=1000)
+    test_collater = ClassificationCollater()
+    seed = 0
+    # batch_size is total size
+    batch_size = 256
+    # num_workers is total workers
+    num_workers = 30
+    accumulation_steps = 4
+    optimizer = (
+        'AdamW',
+        {
+            'lr': 1e-4,
+            'global_weight_decay': False,
+            # if global_weight_decay = False
+            # all bias, bn and other 1d params weight set to 0 weight decay
+            'weight_decay': 1e-4,
+            'no_weight_decay_layer_name_list': [],
+        },
+    )
+    scheduler = (
+        'CosineLR',
+        {
+            'warm_up_epochs': 5,
+            'min_lr': 1e-6,
+        },
+    )
+    epochs = 300
+    print_interval = 50
+    sync_bn = False
+    use_amp = False
+    use_compile = False
+    compile_params = {
+        # 'default': optimizes for large models, low compile-time and no extra memory usage.
+        # 'reduce-overhead': optimizes to reduce the framework overhead and uses some extra memory, helps speed up small models, model update may not correct.
+        # 'max-autotune': optimizes to produce the fastest model, but takes a very long time to compile and may failed.
+        'mode': 'default',
+    }
+    use_ema_model = False
+    ema_model_decay = 0.9999

imagenet/van_b2/__pycache__/train_config.cpython-38.pyc ADDED Viewed

Binary file (3.15 kB). View file

imagenet/van_b2/checkpoints/latest.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7bb5fa0d4292847ea6e686273c143b2d8efb4b9a79ff97c63dbfc837579c38f
+size 319694059

imagenet/van_b2/checkpoints/van_b2-acc82.322.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d62b46a66177819c875a50822bbda4190cac5c989cecde2cd1963b68e6c6f7d
+size 106609413

imagenet/van_b2/log/train.info.log ADDED Viewed

The diff for this file is too large to render. See raw diff

imagenet/van_b2/log/train.info.log.2023-11-21 ADDED Viewed

The diff for this file is too large to render. See raw diff

imagenet/van_b2/test.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.run --nproc_per_node=2 --master_addr 127.0.1.0 --master_port 10000 ../../../tools/test_classification_model.py --work-dir ./

imagenet/van_b2/test_config.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import sys
+BASE_DIR = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.dirname(
+        os.path.abspath(__file__)))))
+sys.path.append(BASE_DIR)
+from tools.path import ILSVRC2012_path
+from simpleAICV.classification import backbones
+from simpleAICV.classification import losses
+from simpleAICV.classification.datasets.ilsvrc2012dataset import ILSVRC2012Dataset
+from simpleAICV.classification.common import Opencv2PIL, TorchResize, TorchCenterCrop, TorchMeanStdNormalize, ClassificationCollater, load_state_dict
+import torch
+import torchvision.transforms as transforms
+class config:
+    '''
+    for resnet,input_image_size = 224;for darknet,input_image_size = 256
+    '''
+    network = 'van_b2'
+    num_classes = 1000
+    input_image_size = 224
+    scale = 256 / 224
+    model = backbones.__dict__[network](**{
+        'num_classes': num_classes,
+    })
+    # load pretrained model or not
+    trained_model_path = ''
+    load_state_dict(trained_model_path, model)
+    test_criterion = losses.__dict__['CELoss']()
+    test_dataset = ILSVRC2012Dataset(
+        root_dir=ILSVRC2012_path,
+        set_name='val',
+        transform=transforms.Compose([
+            Opencv2PIL(),
+            TorchResize(resize=input_image_size * scale),
+            TorchCenterCrop(resize=input_image_size),
+            TorchMeanStdNormalize(mean=[0.485, 0.456, 0.406],
+                                  std=[0.229, 0.224, 0.225]),
+        ]))
+    test_collater = ClassificationCollater()
+    seed = 0
+    # batch_size is total size
+    batch_size = 256
+    # num_workers is total workers
+    num_workers = 16

imagenet/van_b2/train.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m torch.distributed.run --nproc_per_node=4 --master_addr 127.0.1.0 --master_port 10000 ../../../tools/train_classification_model.py --work-dir ./

imagenet/van_b2/train_config.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import os
+import sys
+BASE_DIR = os.path.dirname(
+    os.path.dirname(os.path.dirname(os.path.dirname(
+        os.path.abspath(__file__)))))
+sys.path.append(BASE_DIR)
+from tools.path import ILSVRC2012_path
+from simpleAICV.classification import backbones
+from simpleAICV.classification import losses
+from simpleAICV.classification.datasets.ilsvrc2012dataset import ILSVRC2012Dataset
+from simpleAICV.classification.common import Opencv2PIL, TorchRandomResizedCrop, TorchRandomHorizontalFlip, RandAugment, TorchResize, TorchCenterCrop, TorchMeanStdNormalize, RandomErasing, ClassificationCollater, MixupCutmixClassificationCollater, load_state_dict
+import torch
+import torchvision.transforms as transforms
+class config:
+    '''
+    for resnet,input_image_size = 224;for darknet,input_image_size = 256
+    '''
+    network = 'van_b2'
+    num_classes = 1000
+    input_image_size = 224
+    scale = 256 / 224
+    model = backbones.__dict__[network](**{
+        'drop_path_prob': 0.1,
+        'num_classes': num_classes,
+    })
+    # load pretrained model or not
+    trained_model_path = '/root/code/SimpleAICV_pytorch_training_examples_on_ImageNet_COCO_ADE20K/pretrained_models/van_weight_convert_from_official_weights/van_b2_pytorch_official_weight_convert.pth'
+    load_state_dict(trained_model_path, model)
+    train_criterion = losses.__dict__['OneHotLabelCELoss']()
+    test_criterion = losses.__dict__['CELoss']()
+    train_dataset = ILSVRC2012Dataset(
+        root_dir=ILSVRC2012_path,
+        set_name='train',
+        transform=transforms.Compose([
+            Opencv2PIL(),
+            TorchRandomResizedCrop(resize=input_image_size),
+            TorchRandomHorizontalFlip(prob=0.5),
+            RandAugment(magnitude=9,
+                        num_layers=2,
+                        resize=input_image_size,
+                        mean=[0.485, 0.456, 0.406],
+                        integer=True,
+                        weight_idx=None,
+                        magnitude_std=0.5,
+                        magnitude_max=None),
+            TorchMeanStdNormalize(mean=[0.485, 0.456, 0.406],
+                                  std=[0.229, 0.224, 0.225]),
+            RandomErasing(prob=0.25, mode='pixel', max_count=1),
+        ]))
+    test_dataset = ILSVRC2012Dataset(
+        root_dir=ILSVRC2012_path,
+        set_name='val',
+        transform=transforms.Compose([
+            Opencv2PIL(),
+            TorchResize(resize=input_image_size * scale),
+            TorchCenterCrop(resize=input_image_size),
+            TorchMeanStdNormalize(mean=[0.485, 0.456, 0.406],
+                                  std=[0.229, 0.224, 0.225]),
+        ]))
+    train_collater = MixupCutmixClassificationCollater(
+        use_mixup=True,
+        mixup_alpha=0.8,
+        cutmix_alpha=1.0,
+        cutmix_minmax=None,
+        mixup_cutmix_prob=1.0,
+        switch_to_cutmix_prob=0.5,
+        mode='batch',
+        correct_lam=True,
+        label_smoothing=0.1,
+        num_classes=1000)
+    test_collater = ClassificationCollater()
+    seed = 0
+    # batch_size is total size
+    batch_size = 256
+    # num_workers is total workers
+    num_workers = 60
+    accumulation_steps = 4
+    optimizer = (
+        'AdamW',
+        {
+            'lr': 1e-4,
+            'global_weight_decay': False,
+            # if global_weight_decay = False
+            # all bias, bn and other 1d params weight set to 0 weight decay
+            'weight_decay': 1e-4,
+            'no_weight_decay_layer_name_list': [],
+        },
+    )
+    scheduler = (
+        'CosineLR',
+        {
+            'warm_up_epochs': 5,
+            'min_lr': 1e-6,
+        },
+    )
+    epochs = 300
+    print_interval = 50
+    sync_bn = False
+    use_amp = False
+    use_compile = False
+    compile_params = {
+        # 'default': optimizes for large models, low compile-time and no extra memory usage.
+        # 'reduce-overhead': optimizes to reduce the framework overhead and uses some extra memory, helps speed up small models, model update may not correct.
+        # 'max-autotune': optimizes to produce the fastest model, but takes a very long time to compile and may failed.
+        'mode': 'default',
+    }
+    use_ema_model = False
+    ema_model_decay = 0.9999