File size: 649 Bytes
4e1467d 2896dec |
1 2 3 4 5 6 7 8 9 10 |
@dataclass
class OsSoluConfig:
d_model: int = 512 # Hidden size of the model.
vocab_size: int = 65536 # Vocabulary size of the input sequence. Unsure about this.
learning_rate: float = 1e-3 # Learning rate for the optimiser.
num_embeddings: int = 1024 # Number of embeddings. Unsure about this.
num_blocks: int = 1 # Number of transformer blocks.
dropout: float = 0.1 # Probability of dropout.
ln_eps: float = 1e-3 # Layer norm epsilon.
num_heads: int = 4 # Number of attention heads in each attention layer. |