from transformers import PretrainedConfig | |
class SimpleStories4MConfig(PretrainedConfig): | |
model_type = "simple_stories_4m" | |
def __init__( | |
self, | |
vocab_size: int = 2048, | |
block_size: int = 1080, | |
n_embed: int = 256, | |
n_heads: int = 2, | |
n_layers: int = 4, | |
dropout: float = 0.1, | |
**kwargs | |
): | |
self.vocab_size = vocab_size | |
self.block_size = block_size | |
self.n_embed = n_embed | |
self.n_heads = n_heads | |
self.n_layers = n_layers | |
self.dropout = dropout | |
super().__init__(**kwargs) |