TRAIN: ENABLE: True DATASET: Ssv2 BATCH_SIZE: 32 EVAL_PERIOD: 5 CHECKPOINT_PERIOD: 5 AUTO_RESUME: True CHECKPOINT_EPOCH_RESET: True CHECKPOINT_FILE_PATH: /checkpoint/fmetze/neurips_sota/40944587/checkpoints/checkpoint_epoch_00035.pyth DATA: NUM_FRAMES: 16 SAMPLING_RATE: 4 TRAIN_JITTER_SCALES: [256, 320] TRAIN_CROP_SIZE: 224 TEST_CROP_SIZE: 224 INPUT_CHANNEL_NUM: [3] MEAN: [0.5, 0.5, 0.5] STD: [0.5, 0.5, 0.5] PATH_TO_DATA_DIR: /private/home/mandelapatrick/slowfast/data/ssv2 PATH_PREFIX: /datasets01/SomethingV2/092720/20bn-something-something-v2-frames INV_UNIFORM_SAMPLE: True RANDOM_FLIP: False REVERSE_INPUT_CHANNEL: True USE_RAND_AUGMENT: True RE_PROB: 0.0 USE_REPEATED_AUG: False USE_RANDOM_RESIZE_CROPS: False COLORJITTER: False GRAYSCALE: False GAUSSIAN: False SOLVER: BASE_LR: 1e-4 LR_POLICY: steps_with_relative_lrs LRS: [1, 0.1, 0.01] STEPS: [0, 20, 30] MAX_EPOCH: 35 MOMENTUM: 0.9 WEIGHT_DECAY: 5e-2 WARMUP_EPOCHS: 0.0 OPTIMIZING_METHOD: adamw USE_MIXED_PRECISION: True SMOOTHING: 0.2 SLOWFAST: ALPHA: 8 VIT: PATCH_SIZE: 16 PATCH_SIZE_TEMP: 2 CHANNELS: 3 EMBED_DIM: 768 DEPTH: 12 NUM_HEADS: 12 MLP_RATIO: 4 QKV_BIAS: True VIDEO_INPUT: True TEMPORAL_RESOLUTION: 8 USE_MLP: True DROP: 0.0 POS_DROPOUT: 0.0 DROP_PATH: 0.2 IM_PRETRAINED: True HEAD_DROPOUT: 0.0 HEAD_ACT: tanh PRETRAINED_WEIGHTS: vit_1k ATTN_LAYER: divided MODEL: NUM_CLASSES: 174 ARCH: slow MODEL_NAME: VisionTransformer LOSS_FUNC: cross_entropy TEST: ENABLE: True DATASET: Ssv2 BATCH_SIZE: 64 NUM_ENSEMBLE_VIEWS: 1 NUM_SPATIAL_CROPS: 3 DATA_LOADER: NUM_WORKERS: 4 PIN_MEMORY: True NUM_GPUS: 8 NUM_SHARDS: 4 RNG_SEED: 0 OUTPUT_DIR: . TENSORBOARD: ENABLE: True