from transformers import PretrainedConfig, GPT2Config from typing import List class GPTOptimConfig(GPT2Config): model_type = "gpt_optimized" def __init__( self, block_size: int = 1024, # max sequence length vocab_size: int = 50257, # number of tokens: 50,000 BPE merges + 256 bytes tokens + 1 <|endoftext|> token n_layer: int = 16, # number of layers n_head: int = 16, # number of heads n_embd: int = 1024, # embedding dimension **kwargs, ): super().__init__(**kwargs) self.block_size = block_size self.vocab_size = vocab_size self.n_layer = n_layer self.n_head = n_head self.n_embd = n_embd