File size: 821 Bytes
b442155
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
dataset:
  dataset: imagenet
  image_resolution: 256

stage1:
  type: vqgan
  embed_dim: 256
  n_embed: 16384
  hparams:
    double_z: False
    z_channels: 256
    resolution: 256
    in_channels: 3
    out_ch: 3
    ch: 128
    ch_mult: [1, 1, 2, 2, 4]
    num_res_blocks: 2
    attn_resolutions: [16]
    pdrop: 0.0

stage2:
  type: igpt
  use_cls_cond: True
  vocab_size_img: 16384
  hparams:
    embed_dim: 1536
    n_layers: 42
    n_heads: 24
    n_dense_layers: 42
    ctx_len_img: 256
    embd_pdrop: 0.0
    resid_pdrop: 0.0
    attn_pdrop: 0.0
    mlp_bias: True
    attn_bias: True
    gelu_use_approx: False
    n_classes: 1000

optimizer:
  opt_type: adamW
  base_lr: 1e-4
  weight_decay: 0.0
  betas: [0.9, 0.95]
  grad_clip_norm: 4.0

experiment:
  local_batch_size: 2
  total_batch_size: 512
  epochs: 8