This view is limited to 50 files because it contains too many changes.  See the raw diff here.
Files changed (50) hide show
  1. __init__.py +2 -0
  2. config.json +1 -1
  3. configuration_grok.py +3 -3
  4. pytorch_model-00001-of-00019.bin → model-00001-of-00129.safetensors +2 -2
  5. pytorch_model-00002-of-00019.bin → model-00002-of-00129.safetensors +2 -2
  6. pytorch_model-00003-of-00019.bin → model-00003-of-00129.safetensors +2 -2
  7. pytorch_model-00004-of-00019.bin → model-00004-of-00129.safetensors +2 -2
  8. model-00005-of-00129.safetensors +3 -0
  9. model-00006-of-00129.safetensors +3 -0
  10. model-00007-of-00129.safetensors +3 -0
  11. model-00008-of-00129.safetensors +3 -0
  12. model-00009-of-00129.safetensors +3 -0
  13. model-00010-of-00129.safetensors +3 -0
  14. model-00011-of-00129.safetensors +3 -0
  15. model-00012-of-00129.safetensors +3 -0
  16. model-00013-of-00129.safetensors +3 -0
  17. model-00014-of-00129.safetensors +3 -0
  18. model-00015-of-00129.safetensors +3 -0
  19. model-00016-of-00129.safetensors +3 -0
  20. model-00017-of-00129.safetensors +3 -0
  21. model-00018-of-00129.safetensors +3 -0
  22. model-00019-of-00129.safetensors +3 -0
  23. model-00020-of-00129.safetensors +3 -0
  24. model-00021-of-00129.safetensors +3 -0
  25. model-00022-of-00129.safetensors +3 -0
  26. model-00023-of-00129.safetensors +3 -0
  27. model-00024-of-00129.safetensors +3 -0
  28. model-00025-of-00129.safetensors +3 -0
  29. model-00026-of-00129.safetensors +3 -0
  30. model-00027-of-00129.safetensors +3 -0
  31. model-00028-of-00129.safetensors +3 -0
  32. model-00029-of-00129.safetensors +3 -0
  33. model-00030-of-00129.safetensors +3 -0
  34. model-00031-of-00129.safetensors +3 -0
  35. model-00032-of-00129.safetensors +3 -0
  36. model-00033-of-00129.safetensors +3 -0
  37. model-00034-of-00129.safetensors +3 -0
  38. model-00035-of-00129.safetensors +3 -0
  39. model-00036-of-00129.safetensors +3 -0
  40. model-00037-of-00129.safetensors +3 -0
  41. model-00038-of-00129.safetensors +3 -0
  42. model-00039-of-00129.safetensors +3 -0
  43. model-00040-of-00129.safetensors +3 -0
  44. model-00041-of-00129.safetensors +3 -0
  45. model-00042-of-00129.safetensors +3 -0
  46. model-00043-of-00129.safetensors +3 -0
  47. model-00044-of-00129.safetensors +3 -0
  48. model-00045-of-00129.safetensors +3 -0
  49. model-00046-of-00129.safetensors +3 -0
  50. model-00047-of-00129.safetensors +3 -0
__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .configuration_grok import *
2
+ from .modeling_grok import *
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "hf/",
3
  "architectures": [
4
  "GrokForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "grok-ztmp",
3
  "architectures": [
4
  "GrokForCausalLM"
5
  ],
configuration_grok.py CHANGED
@@ -96,8 +96,8 @@ class GrokConfig(PretrainedConfig):
96
  num_hidden_layers=64,
97
  num_attention_heads=48,
98
  num_key_value_heads=8,
99
- hidden_act="silu",
100
- max_position_embeddings=4096,
101
  initializer_range=0.02,
102
  rms_norm_eps=1e-5,
103
  use_cache=True,
@@ -105,7 +105,7 @@ class GrokConfig(PretrainedConfig):
105
  bos_token_id=1,
106
  eos_token_id=2,
107
  tie_word_embeddings=True,
108
- rope_theta=1e5,
109
  attention_dropout=0.0,
110
  num_experts_per_tok=2,
111
  num_local_experts=8,
 
96
  num_hidden_layers=64,
97
  num_attention_heads=48,
98
  num_key_value_heads=8,
99
+ hidden_act="gelu_new",
100
+ max_position_embeddings=8192,
101
  initializer_range=0.02,
102
  rms_norm_eps=1e-5,
103
  use_cache=True,
 
105
  bos_token_id=1,
106
  eos_token_id=2,
107
  tie_word_embeddings=True,
108
+ rope_theta=1e4,
109
  attention_dropout=0.0,
110
  num_experts_per_tok=2,
111
  num_local_experts=8,
pytorch_model-00001-of-00019.bin → model-00001-of-00129.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49bf50ef3d70afd8e7d199223f8472bc7296f2407ec2f43809ca62276ebaa9a9
3
- size 24688084712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:739afdc4fe7c736298f1056131da2a27d5e08e2d9d3654470866d11339e71d08
3
+ size 4605544112
pytorch_model-00002-of-00019.bin → model-00002-of-00129.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd2e170b63a6532af4f1e7cde4fb51c34bd810aaa0ebbf33345f36f1d08fe9ac
3
- size 35962479774
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2da2697d8179839e2c4eab0df9f9af6bfe1ac0f022db6ee942e8e472d3b72e2b
3
+ size 4831839776
pytorch_model-00003-of-00019.bin → model-00003-of-00129.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abb2c6f340f4d0f122c92479f0c2e9324d8f415af1bed4a645e6a374dd7693c3
3
- size 32917463282
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c619d81a1e82abfaf498cd6cb4fefd509efccaca6e927fc62cc5060d9f6d6361
3
+ size 4605643240
pytorch_model-00004-of-00019.bin → model-00004-of-00129.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9fdff65e71dc305f23181d18ea28b35c5b740f3d624a6d7e92495ccb7cb7a07
3
- size 35962479902
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f77124ab7ec6a9fce4a42eef6908136fd6ef684a78bb157819d6215bdee9790
3
+ size 4831839776
model-00005-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34babd1fbb246f160ee65e8d66963fa6dabe7d67bd6442f6a3b7e1120908d5e8
3
+ size 4605643248
model-00006-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9741b2416238bac11d893af8c13e5f7bd824cfbbda7e53806e2e7432a42bd7c
3
+ size 4831839776
model-00007-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b119ac6352864065f756bdd989b87a365dcedbfbd2285b306b2a81ef0a194901
3
+ size 4605643240
model-00008-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:099d554f67f605c85f3a9e90cc6d781b252c46b2a4cc6c13fa3573ac13dd5671
3
+ size 4831839776
model-00009-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11a8593548b70f975b3b08faec2c0a51ee2426a40e3590dcc20dd3b103075b39
3
+ size 4605643240
model-00010-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e450cb6a0a6d23b2cc29cba9b0bdcdff2bd088fad8a5dc7064276640f87de16
3
+ size 4831839776
model-00011-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f43f5d81e191360353b909503bb5257872884dc810c09331ad3ffcdd3e896356
3
+ size 4605643248
model-00012-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4e3e6ecd078f12184f9b9d2369215cbaf6d9dbcd47f556f9e0c720efb1c848b
3
+ size 4831839776
model-00013-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eac368881aedb951ee3eae29809f3c2bd8440f9ce7e76d74f2ff43c06377a1e7
3
+ size 4605643240
model-00014-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16ff28e12db93b60248c80d47703b75a8ae8cc99fdf6dc28e2972ccda76aab8c
3
+ size 4831839776
model-00015-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:772707d8d2b363e9fe2ece49d26b099f57d7d3be0d0c63b85d7d399317391af6
3
+ size 4605643240
model-00016-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8d31ef102baf09a2b05f466eb411333070070c8cc6e0e878c503ad53bb3a26d
3
+ size 4831839776
model-00017-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14b32ffd8b1fb73af472141f2144b8c2832a0d05bd8c3c7719268082d3126648
3
+ size 4932602224
model-00018-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e16a2317e13ff084cf5db91213c073d1228aff1c05eb5e05a5073602eee1b75
3
+ size 4907534104
model-00019-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e05340708865242537b03a520a934f91dac42cadca279420d1510ffbbbf714
3
+ size 4932602224
model-00020-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f0469775fc7cff1e86c415d04a130ee6e5d279ac40ff65971b8062328036cad
3
+ size 4907534104
model-00021-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c3bcac033018cd66c6b61f4c1694f5bb3f8f82908482c1d531ccb4e7176a48c
3
+ size 4932602216
model-00022-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f39177735f9e11cc0547eaf56af0b7d2977864d7caae927bbb29aca14aa63ed
3
+ size 4907534120
model-00023-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13661a056515aaada2be4620a02784080d7fbbbc79c190b19b6aebe7613951b1
3
+ size 4932602248
model-00024-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:755229210823b40a3241880975d6fc51771b7cc3c36e54219fb44dbed48cb2c2
3
+ size 4907534120
model-00025-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8119c0587696df0b7a9c74758df30587657bd1cd766d105020c590c9bd035c0
3
+ size 4932602248
model-00026-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c99121391ec30bb015010fbefc321f5fb7ca3bb58327261fb51e92edde042198
3
+ size 4907534120
model-00027-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6d3e3effb57f725f3d078e2fffdca481bcd8bd9f0719bd88fc6e00ff60db9de
3
+ size 4932602248
model-00028-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6924fd54f346d241b7e747d6118fac999acb8ed4253b5d156555ce28df9c9e2
3
+ size 4907534120
model-00029-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54c8fcf7dfe74ffe480cc2673485a1ffe3c21dea79d109a27294118d511221db
3
+ size 4932602248
model-00030-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a5da644297e20c4d3789b83ad856b0529940352e29f5b53999aaa5ad420b780
3
+ size 4907534120
model-00031-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f0263c63393d929a3088eafaf071e30d53e4786c23e37af955e910693f0a482
3
+ size 4932602248
model-00032-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25b07be35d6b22d2cefddef443f020ecdb6f68a29fa0d0821c68b5ba471f126
3
+ size 4907534120
model-00033-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b0ff57efd777bf516e8501f2fe41afac4f5b7cce723c246586238c33b9ee8f0
3
+ size 4932602248
model-00034-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd742e0639c16b9e79b1360a23d8823e661e1e36ac21581dc4da9bca1f482e03
3
+ size 4907534120
model-00035-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8646bc069c02ce86375f50572bcbcb65299a8f5a7c34be1164d983e9478e5b3
3
+ size 4932602248
model-00036-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e376e4fb2c76cf03c335a7904872306935c76b3b69af8e6d4f6ae9a2fd87b6cf
3
+ size 4907534120
model-00037-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55c188265cc7cc166ef111d1e92a4392cc2ab993d84046bae390bfdc5be28fb2
3
+ size 4932602248
model-00038-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08309696ef312a305b8f25bb19f2781f086d1bd9b847a5232e1357482fb015bd
3
+ size 4907534120
model-00039-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25bf348553f9229c981a48155bcccece55e9d7e7771de6ead7e512173c19a8bb
3
+ size 4932602248
model-00040-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08c4f2b7b66e2b58060f3e42bca18ca397e61d806137f88448219e924f5b3a97
3
+ size 4907534120
model-00041-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26c6502621af02d5354b715f0694021e26d9691a40b410d4a64899619993db4a
3
+ size 4932602248
model-00042-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f61c9740cc478a478ce13122d71a91c426128d61b10effa0d7272b0b025a969a
3
+ size 4907534120
model-00043-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f9b9088299b99cd7fda366c803886bebcd58157d74456a0ae907c96199073c
3
+ size 4932602248
model-00044-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd1e334af06cab85e057b1be0ff963070991c36742a1ae3787c8465a9d22bce6
3
+ size 4907534120
model-00045-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:312a80c2561d38b4664af2e8d33c895557de8d4936b24ebae611dc5ed926d191
3
+ size 4932602248
model-00046-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d83ef5eb39b1b64ad6130164a0146e60ccf380ed09d9ecd92a810ff420c7aa42
3
+ size 4907534120
model-00047-of-00129.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59cfe6943a562075ea9f4b8ffff6fbef53dc1df462b5c14889b531024770269
3
+ size 4932602248