File size: 435 Bytes
4ea6cf9 |
1 2 3 4 5 6 7 8 9 10 |
# Hyperparameters and config settings
EMBED_DIM = 256 # Size of token embeddings
NUM_HEADS = 8 # Number of attention heads
NUM_LAYERS = 4 # Number of transformer blocks
FF_DIM = 512 # Feedforward layer dimension
MAX_SEQ_LEN = 256 # Maximum sequence length
VOCAB_SIZE = 100 # Placeholder (will be overridden based on dataset)
ADAPTER_DIM = 32 # Add in adapter for continual learning
|