mbiswas
/

pointing_models_from_scratch

Model card Files Files and versions

pointing_models_from_scratch / constants.py

mbiswas's picture

Upload 10 files

b781107 verified 5 months ago

history blame contribute delete

1.04 kB

	import torch

	IMAGE_SIZE = 512
	PATCH_SIZE = 16
	HIDDEN_DIM = 256
	CONTEXT_LENGTH = 1536
	TEXT_LENGTH = 512 # Max length for target sequence (coords)
	PROMPT_LENGTH = 64 # Max length for prompt sequence (description) - Adjust as needed
	DROPOUT = 0.1
	NUM_HEADS = 8
	NUM_LAYERS = 12 # Keep moderate layers
	BATCH_SIZE = 16
	LEARNING_RATE = 1e-3 # Lower LR might be needed with contrastive loss
	DTYPE = torch.float32 # torch.bfloat16 created some instability, why?
	GRAD_ACCUMULATION_STEPS = 16
	IMAGE_MEAN = [0.485, 0.456, 0.406]
	IMAGE_STD = [0.229, 0.224, 0.225]
	DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
	IMAGE_LOCATION = "./images/"
	NUM_BINS = 32
	SHARED_EMBED_DIM = 256 # Dimension for contrastive space
	NUM_BINS = 32
	MAX_POINTS = 10 # Maximum number of points per image to handle

	# Training loop constants
	NUM_EPOCHS = 400 # desired number of epochs
	LOGGING_STEPS = 1 # Log every N optimization steps
	MAX_GRAD_NORM = 1.0
	LAMBDA_CONTRASTIVE = 2 # Weight for contrastive loss - TUNE THIS
	LAMBDA_REGRESSION = 2 # Works but noisy