File size: 1,386 Bytes
3340437
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
MODE = "train" # train, ppl_eval, sample_eval
DIFFUSION = "absorbing_state"
BACKBONE = "dit"
PARAMETERIZATION = "subs" # subs, d3pm, sedd
TIME_CONDITIONING = False
T = 1000
SUBS_MASKING = False
LATENT_DIM = 1280
MODEL_NAME = 'facebook/esm2_t33_650M_UR50D'

SEED = 1

class Loader:
    BATCH_SIZE = 8
    DATA_PATH = "/workspace/a03-sgoel/MDpLM/data"
    
class Sampling:
    PREDICTOR = "ddpm_cache"  # analytic, ddpm, ddpm_cache (recommended)
    STEPS = 128
    NOISE_REMOVAL = True
    STRIDE_LENGTH = 1
    NUM_STRIDES = 1
    NUM_SAMPLING_BATCHES = 2

class model:
    hidden_size = 1280
    cond_dim = 1280
    n_heads = 8
    n_blocks = 2
    dropout = 0.5
    length = 512

class Training:
    EMA = 0.9999
    ANTITHETIC_SAMPLING = True
    SAMPLING_EPS = 1e-3
    ACCUMULATE_GRAD_BATCHES = 2
    NUM_EPOCHS = 10
    GRADIENT_CLIP_VAL = 1.0
    PRECISION = 'bf16'
    MAX_STEPS = 1,000,000
    LOG_EVERY_N_STEPS: 10
    GPUS = 2
    SAVE_DIR = "/workspace/a03-sgoel/MDpLM/models"

class Eval:
    CHECKPOINT_PATH = "/workspace/a03-sgoel/MDpLM/checkpoints"
    DISABLE_EMA = False
    COMPUTE_GENERATIVE_PERPLEXITY = False
    PERPLEXITY_BATCH_SIZE = 8
    GENERATE_SAMPLES = True

class Optim:
    LR = 1e-4
    BETA1 = 0.9
    BETA2 = 0.999
    EPS = 1e-8


# finish this when training actually works
class Wandb:
    PROJECT = "MDpLM_shrey_test"
    GROUP = "programmablebio"