broskicodes commited on
Commit
f3478a1
1 Parent(s): 09ebec9

Upload model

Browse files
Files changed (4) hide show
  1. config.json +18 -0
  2. config_4m.py +22 -0
  3. model.safetensors +3 -0
  4. model_4m.py +22 -0
config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SimpleStories4MModel"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "config_4m.SimpleStories4MConfig",
7
+ "AutoModel": "model_4m.SimpleStories4MModel"
8
+ },
9
+ "block_size": 1080,
10
+ "dropout": 0.1,
11
+ "model_type": "simple_stories_4m",
12
+ "n_embed": 256,
13
+ "n_heads": 2,
14
+ "n_layers": 4,
15
+ "torch_dtype": "float32",
16
+ "transformers_version": "4.36.2",
17
+ "vocab_size": 2048
18
+ }
config_4m.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+
3
+ class SimpleStories4MConfig(PretrainedConfig):
4
+ model_type = "simple_stories_4m"
5
+
6
+ def __init__(
7
+ self,
8
+ vocab_size: int = 2048,
9
+ block_size: int = 1080,
10
+ n_embed: int = 256,
11
+ n_heads: int = 2,
12
+ n_layers: int = 4,
13
+ dropout: float = 0.1,
14
+ **kwargs
15
+ ):
16
+ self.vocab_size = vocab_size
17
+ self.block_size = block_size
18
+ self.n_embed = n_embed
19
+ self.n_heads = n_heads
20
+ self.n_layers = n_layers
21
+ self.dropout = dropout
22
+ super().__init__(**kwargs)
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e8c4a3f27258d9726c7d463899171c9ffe0b8f69de03f7fc3edf4d39806f403
3
+ size 55267264
model_4m.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PreTrainedModel
2
+ from simple_stories_4m_model.config_4m import SimpleStories4MConfig
3
+ from simple_stories_4m_model.nano_gpt_model import NanoGPT
4
+
5
+ class SimpleStories4MModel(PreTrainedModel):
6
+ config_class = SimpleStories4MConfig
7
+
8
+ def __init__(self, config):
9
+ super().__init__(config)
10
+ hyperparameters = {
11
+ "vocab_size": config.vocab_size,
12
+ "block_size": config.block_size,
13
+ "n_embed": config.n_embed,
14
+ "n_heads": config.n_heads,
15
+ "n_layers": config.n_layers,
16
+ "dropout": config.dropout,
17
+
18
+ }
19
+ self.model = NanoGPT(hyperparameters)
20
+
21
+ def forward(self, tensor, targets=None):
22
+ return self.model(tensor, targets)