mbiswas's picture
Upload 10 files
b781107 verified
import torch
IMAGE_SIZE = 512
PATCH_SIZE = 16
HIDDEN_DIM = 256
CONTEXT_LENGTH = 1536
TEXT_LENGTH = 512 # Max length for *target* sequence (coords)
PROMPT_LENGTH = 64 # Max length for *prompt* sequence (description) - Adjust as needed
DROPOUT = 0.1
NUM_HEADS = 8
NUM_LAYERS = 12 # Keep moderate layers
BATCH_SIZE = 16
LEARNING_RATE = 1e-3 # Lower LR might be needed with contrastive loss
DTYPE = torch.float32 # torch.bfloat16 created some instability, why?
GRAD_ACCUMULATION_STEPS = 16
IMAGE_MEAN = [0.485, 0.456, 0.406]
IMAGE_STD = [0.229, 0.224, 0.225]
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
IMAGE_LOCATION = "./images/"
NUM_BINS = 32
SHARED_EMBED_DIM = 256 # Dimension for contrastive space
NUM_BINS = 32
MAX_POINTS = 10 # Maximum number of points per image to handle
# Training loop constants
NUM_EPOCHS = 400 # desired number of epochs
LOGGING_STEPS = 1 # Log every N optimization steps
MAX_GRAD_NORM = 1.0
LAMBDA_CONTRASTIVE = 2 # Weight for contrastive loss - TUNE THIS
LAMBDA_REGRESSION = 2 # Works but noisy