diff --git a/full_checkpoints/Model-B-16_Data-2B_Samples-13B_lr-1e-3_bs-88k/epoch_1.pt b/full_checkpoints/Model-B-16_Data-2B_Samples-13B_lr-1e-3_bs-88k/epoch_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..bab4be13a36451dfbb10a3b86def1776ef268400 --- /dev/null +++ b/full_checkpoints/Model-B-16_Data-2B_Samples-13B_lr-1e-3_bs-88k/epoch_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e556733a87a7fb5b882d07142897e25a279e52f340cf2e737f672d698cac82c1 +size 1795822369 diff --git a/full_checkpoints/Model-B-16_Data-2B_Samples-13B_lr-1e-3_bs-88k/epoch_1.pt.txt b/full_checkpoints/Model-B-16_Data-2B_Samples-13B_lr-1e-3_bs-88k/epoch_1.pt.txt new file mode 100644 index 0000000000000000000000000000000000000000..1a9d46cfa102d30e8dd18a9b60031c55b19d0bb3 --- /dev/null +++ b/full_checkpoints/Model-B-16_Data-2B_Samples-13B_lr-1e-3_bs-88k/epoch_1.pt.txt @@ -0,0 +1,432 @@ +2022-12-21,09:36:42 | INFO | Running with a single process. Device cuda:0. +2022-12-21,09:36:42 | INFO | Loading ViT-B-16 model config. +2022-12-21,09:36:43 | INFO | Loading pretrained ViT-B-16 weights (logs/cvpr_stability/ViT-B-16_2B_13b/checkpoints/epoch_1.pt). +2022-12-21,09:36:52 | INFO | Model: +2022-12-21,09:36:52 | INFO | CLIP( + (visual): VisualTransformer( + (conv1): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16), bias=False) + (ln_pre): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (transformer): Transformer( + (resblocks): ModuleList( + (0): ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + ) + (1): ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + ) + (2): ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + ) + (3): ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + ) + (4): ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + ) + (5): ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + ) + (6): ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + ) + (7): ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + ) + (8): ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + ) + (9): ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + ) + (10): ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + ) + (11): ResidualAttentionBlock( + (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=768, out_features=768, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=768, out_features=3072, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=3072, out_features=768, bias=True) + ) + ) + ) + ) + (ln_post): LayerNorm((768,), eps=1e-05, elementwise_affine=True) + ) + (transformer): Transformer( + (resblocks): ModuleList( + (0): ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + ) + (1): ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + ) + (2): ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + ) + (3): ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + ) + (4): ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + ) + (5): ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + ) + (6): ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + ) + (7): ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + ) + (8): ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + ) + (9): ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + ) + (10): ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + ) + (11): ResidualAttentionBlock( + (ln_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (attn): MultiheadAttention( + (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) + ) + (ln_attn): Identity() + (ln_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) + (mlp): Sequential( + (c_fc): Linear(in_features=512, out_features=2048, bias=True) + (ln): Identity() + (gelu): GELU(approximate=none) + (c_proj): Linear(in_features=2048, out_features=512, bias=True) + ) + ) + ) + ) + (token_embedding): Embedding(49408, 512) + (ln_final): LayerNorm((512,), eps=1e-05, elementwise_affine=True) +) +2022-12-21,09:36:52 | INFO | Params: +2022-12-21,09:36:52 | INFO | batch_size: 256 +2022-12-21,09:36:52 | INFO | beta1: 0.9 +2022-12-21,09:36:52 | INFO | beta2: 0.98 +2022-12-21,09:36:52 | INFO | checkpoint_path: logs/cvpr_stability/ViT-B-16_2B_13b/checkpoints/eval/epoch_1.pt/checkpoints +2022-12-21,09:36:52 | INFO | copy_codebase: False +2022-12-21,09:36:52 | INFO | csv_caption_key: title +2022-12-21,09:36:52 | INFO | csv_img_key: filepath +2022-12-21,09:36:52 | INFO | csv_separator: +2022-12-21,09:36:52 | INFO | dataset_resampled: False +2022-12-21,09:36:52 | INFO | dataset_type: auto +2022-12-21,09:36:52 | INFO | ddp_static_graph: False +2022-12-21,09:36:52 | INFO | debug: False +2022-12-21,09:36:52 | INFO | device: cuda:0 +2022-12-21,09:36:52 | INFO | dist_backend: nccl +2022-12-21,09:36:52 | INFO | dist_url: env:// +2022-12-21,09:36:52 | INFO | distributed: False +2022-12-21,09:36:52 | INFO | epochs: 32 +2022-12-21,09:36:52 | INFO | eps: 1e-06 +2022-12-21,09:36:52 | INFO | force_quick_gelu: False +2022-12-21,09:36:52 | INFO | gather_with_grad: False +2022-12-21,09:36:52 | INFO | grad_checkpointing: False +2022-12-21,09:36:52 | INFO | horovod: False +2022-12-21,09:36:52 | INFO | image_mean: None +2022-12-21,09:36:52 | INFO | image_std: None +2022-12-21,09:36:52 | INFO | imagenet_v2: None +2022-12-21,09:36:52 | INFO | imagenet_val: /p/fastdata/mmlaion/imagenet_val +2022-12-21,09:36:52 | INFO | local_loss: False +2022-12-21,09:36:52 | INFO | local_rank: 0 +2022-12-21,09:36:52 | INFO | lock_image: False +2022-12-21,09:36:52 | INFO | lock_image_freeze_bn_stats: False +2022-12-21,09:36:52 | INFO | lock_image_unlocked_groups: 0 +2022-12-21,09:36:52 | INFO | log_level: 20 +2022-12-21,09:36:52 | INFO | log_local: False +2022-12-21,09:36:52 | INFO | log_path: logs/cvpr_stability/ViT-B-16_2B_13b/checkpoints/eval/epoch_1.pt/out.log +2022-12-21,09:36:52 | INFO | logs: logs/cvpr_stability/ViT-B-16_2B_13b/checkpoints/eval +2022-12-21,09:36:52 | INFO | lr: 0.0005 +2022-12-21,09:36:52 | INFO | model: ViT-B-16 +2022-12-21,09:36:52 | INFO | name: epoch_1.pt +2022-12-21,09:36:52 | INFO | no_set_device_rank: False +2022-12-21,09:36:52 | INFO | norm_gradient_clip: None +2022-12-21,09:36:52 | INFO | precision: amp_bfloat16 +2022-12-21,09:36:52 | INFO | pretrained: logs/cvpr_stability/ViT-B-16_2B_13b/checkpoints/epoch_1.pt +2022-12-21,09:36:52 | INFO | pretrained_image: False +2022-12-21,09:36:52 | INFO | rank: 0 +2022-12-21,09:36:52 | INFO | report_to: +2022-12-21,09:36:52 | INFO | resume: None +2022-12-21,09:36:52 | INFO | save_frequency: 1 +2022-12-21,09:36:52 | INFO | save_most_recent: False +2022-12-21,09:36:52 | INFO | seed: 0 +2022-12-21,09:36:52 | INFO | skip_scheduler: False +2022-12-21,09:36:52 | INFO | tensorboard: False +2022-12-21,09:36:52 | INFO | tensorboard_path: +2022-12-21,09:36:52 | INFO | torchscript: False +2022-12-21,09:36:52 | INFO | trace: False +2022-12-21,09:36:52 | INFO | train_data: None +2022-12-21,09:36:52 | INFO | train_num_samples: None +2022-12-21,09:36:52 | INFO | use_bn_sync: False +2022-12-21,09:36:52 | INFO | val_data: None +2022-12-21,09:36:52 | INFO | val_frequency: 1 +2022-12-21,09:36:52 | INFO | val_num_samples: None +2022-12-21,09:36:52 | INFO | wandb: False +2022-12-21,09:36:52 | INFO | wandb_notes: +2022-12-21,09:36:52 | INFO | warmup: 10000 +2022-12-21,09:36:52 | INFO | wd: 0.2 +2022-12-21,09:36:52 | INFO | workers: 1 +2022-12-21,09:36:52 | INFO | world_size: 1 +2022-12-21,09:36:52 | INFO | zeroshot_frequency: 2 +2022-12-21,09:36:53 | INFO | Starting zero-shot imagenet. +2022-12-21,09:36:53 | INFO | Building zero-shot classifier + 0%| | 0/1000 [00:00