AdamF92 commited on
Commit
f4c9567
·
verified ·
1 Parent(s): fcfc956

Epoch 0 - Val loss 1.8263

Browse files
Files changed (2) hide show
  1. config.json +53 -0
  2. model.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "att_dropout": 0.0,
3
+ "att_heads": 16,
4
+ "embed_dim": 1024,
5
+ "ff_dim": 3072,
6
+ "ff_dropout": 0.0,
7
+ "head_dim": 128,
8
+ "kv_heads": 8,
9
+ "layer_types": [
10
+ "stateless",
11
+ "stateful",
12
+ "stateless",
13
+ "stateful",
14
+ "stateless",
15
+ "stateful",
16
+ "stateless",
17
+ "stateful",
18
+ "stateless",
19
+ "stateful",
20
+ "stateless",
21
+ "stateful",
22
+ "stateless",
23
+ "stateful",
24
+ "stateless",
25
+ "stateful",
26
+ "stateless",
27
+ "stateful",
28
+ "stateless",
29
+ "stateful",
30
+ "stateless",
31
+ "stateful",
32
+ "stateless",
33
+ "stateful",
34
+ "stateless",
35
+ "stateful",
36
+ "stateless",
37
+ "stateful"
38
+ ],
39
+ "memory_gate_type": "linear",
40
+ "num_layers": 28,
41
+ "padding_idx": 151669,
42
+ "rope_base": 1000000,
43
+ "seq_len": 8192,
44
+ "skip_stm": false,
45
+ "stm_batch_size": 1,
46
+ "stm_size": 4096,
47
+ "tie_embeddings": true,
48
+ "training_cache": true,
49
+ "use_flash_attention": false,
50
+ "use_memory_gate": true,
51
+ "use_separate_memory_projections": true,
52
+ "vocab_size": 151936
53
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76116ab5cf98ce7f30462900922935665ebc9f41b3795546dd35461d6a9c4d6
3
+ size 1369381832