File size: 435 Bytes
4ea6cf9
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
# Hyperparameters and config settings

EMBED_DIM = 256        # Size of token embeddings
NUM_HEADS = 8          # Number of attention heads
NUM_LAYERS = 4         # Number of transformer blocks
FF_DIM = 512           # Feedforward layer dimension
MAX_SEQ_LEN = 256      # Maximum sequence length
VOCAB_SIZE = 100       # Placeholder (will be overridden based on dataset)
ADAPTER_DIM = 32        # Add in adapter for continual learning