reymondzzz commited on
Commit
54a31cf
1 Parent(s): df9a9e0
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. config.json +39 -0
  3. lm_head.weight +3 -0
  4. transformer.h.0.attn.c_attn.bias +3 -0
  5. transformer.h.0.attn.c_attn.g_idx +3 -0
  6. transformer.h.0.attn.c_attn.qweight +3 -0
  7. transformer.h.0.attn.c_attn.qzeros +3 -0
  8. transformer.h.0.attn.c_attn.scales +3 -0
  9. transformer.h.0.attn.c_proj.bias +3 -0
  10. transformer.h.0.attn.c_proj.g_idx +3 -0
  11. transformer.h.0.attn.c_proj.qweight +3 -0
  12. transformer.h.0.attn.c_proj.qzeros +3 -0
  13. transformer.h.0.attn.c_proj.scales +3 -0
  14. transformer.h.0.ln_1.bias +3 -0
  15. transformer.h.0.ln_1.weight +3 -0
  16. transformer.h.0.ln_2.bias +3 -0
  17. transformer.h.0.ln_2.weight +3 -0
  18. transformer.h.0.mlp.c_fc.bias +3 -0
  19. transformer.h.0.mlp.c_fc.g_idx +3 -0
  20. transformer.h.0.mlp.c_fc.qweight +3 -0
  21. transformer.h.0.mlp.c_fc.qzeros +3 -0
  22. transformer.h.0.mlp.c_fc.scales +3 -0
  23. transformer.h.0.mlp.c_proj.bias +3 -0
  24. transformer.h.0.mlp.c_proj.g_idx +3 -0
  25. transformer.h.0.mlp.c_proj.qweight +3 -0
  26. transformer.h.0.mlp.c_proj.qzeros +3 -0
  27. transformer.h.0.mlp.c_proj.scales +3 -0
  28. transformer.h.1.attn.c_attn.bias +3 -0
  29. transformer.h.1.attn.c_attn.g_idx +3 -0
  30. transformer.h.1.attn.c_attn.qweight +3 -0
  31. transformer.h.1.attn.c_attn.qzeros +3 -0
  32. transformer.h.1.attn.c_attn.scales +3 -0
  33. transformer.h.1.attn.c_proj.bias +3 -0
  34. transformer.h.1.attn.c_proj.g_idx +3 -0
  35. transformer.h.1.attn.c_proj.qweight +3 -0
  36. transformer.h.1.attn.c_proj.qzeros +3 -0
  37. transformer.h.1.attn.c_proj.scales +3 -0
  38. transformer.h.1.ln_1.bias +3 -0
  39. transformer.h.1.ln_1.weight +3 -0
  40. transformer.h.1.ln_2.bias +3 -0
  41. transformer.h.1.ln_2.weight +3 -0
  42. transformer.h.1.mlp.c_fc.bias +3 -0
  43. transformer.h.1.mlp.c_fc.g_idx +3 -0
  44. transformer.h.1.mlp.c_fc.qweight +3 -0
  45. transformer.h.1.mlp.c_fc.qzeros +3 -0
  46. transformer.h.1.mlp.c_fc.scales +3 -0
  47. transformer.h.1.mlp.c_proj.bias +3 -0
  48. transformer.h.1.mlp.c_proj.g_idx +3 -0
  49. transformer.h.1.mlp.c_proj.qweight +3 -0
  50. transformer.h.1.mlp.c_proj.qzeros +3 -0
.gitattributes CHANGED
@@ -32,3 +32,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ *.bias filter=lfs diff=lfs merge=lfs -text
36
+ *.weight filter=lfs diff=lfs merge=lfs -text
37
+ *.g_idx filter=lfs diff=lfs merge=lfs -text
38
+ *.qweight filter=lfs diff=lfs merge=lfs -text
39
+ *.qzeros filter=lfs diff=lfs merge=lfs -text
40
+ *.scales filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/fsx/bigcode/experiments/pretraining/conversions/6672/large-model",
3
+ "activation_function": "gelu",
4
+ "architectures": [
5
+ "GPTBigCodeForCausalLM"
6
+ ],
7
+ "attention_softmax_in_fp32": true,
8
+ "multi_query": true,
9
+ "attn_pdrop": 0.1,
10
+ "bos_token_id": 0,
11
+ "embd_pdrop": 0.1,
12
+ "eos_token_id": 0,
13
+ "inference_runner": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_batch_size": null,
17
+ "max_sequence_length": null,
18
+ "model_type": "gpt_bigcode",
19
+ "n_embd": 6144,
20
+ "n_head": 48,
21
+ "n_inner": 24576,
22
+ "n_layer": 40,
23
+ "n_positions": 8192,
24
+ "pad_key_length": true,
25
+ "pre_allocate_kv_cache": false,
26
+ "resid_pdrop": 0.1,
27
+ "scale_attention_softmax_in_fp32": true,
28
+ "scale_attn_weights": true,
29
+ "summary_activation": null,
30
+ "summary_first_dropout": 0.1,
31
+ "summary_proj_to_labels": true,
32
+ "summary_type": "cls_index",
33
+ "summary_use_proj": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.28.1",
36
+ "use_cache": true,
37
+ "validate_runner_input": true,
38
+ "vocab_size": 49152
39
+ }
lm_head.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:529afc429e3dda328ffb4cb3c2358a504d5508daf91cd0bebd59d443096afd05
3
+ size 1207960299
transformer.h.0.attn.c_attn.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13eabe7b4cf71f0da24052ded6a5b07855e2b1f2af56e15de6a17d1f6e642936
3
+ size 13671
transformer.h.0.attn.c_attn.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1af7c9cd16344e180fc7366849484b94f69f2cd08a8fbed547ea882dac66701d
3
+ size 50023
transformer.h.0.attn.c_attn.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0de72e21cabb0fbccab8bcc51dcf80ab955eb340947f8a311382b1427298f1d1
3
+ size 39322471
transformer.h.0.attn.c_attn.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a267bcec6e014d5e28a5923cbd94eb4d8d77d07c4ca16870b426836f455b5d77
3
+ size 308071
transformer.h.0.attn.c_attn.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e5a104a801756a3fa40c7ca7ceb4bded22af8ca197d32e6d1bce7044271ef97
3
+ size 615271
transformer.h.0.attn.c_proj.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d1b0f449ad065cb6e725c000d86a7c2f995b7948db16b57c3256dec30f7a89a
3
+ size 13159
transformer.h.0.attn.c_proj.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:079372e2e2f13672edfe35c8757cca056a50a9957c0804bc1a4ca32c8e113051
3
+ size 50023
transformer.h.0.attn.c_proj.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2c4fc57da217dd5f4fb4e5d4c29ba31b78aae8d5860adaf61a8ba3cfbcb7cb1
3
+ size 37749607
transformer.h.0.attn.c_proj.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8af984480b240fbe54b99513c315474389b85446f3e4c176ed4546c0dbb75963
3
+ size 295783
transformer.h.0.attn.c_proj.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca2cd4e7aecb00c1e5bb7b27e69160facd5f8b01497d9dfc1e379f65b991f77d
3
+ size 590695
transformer.h.0.ln_1.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7a16d83afa3a6998832bb0567b0b99d07207d85e681d561e75f96cfef63a023
3
+ size 25362
transformer.h.0.ln_1.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a4d24d1a3efb0f034385808df30bef44cb41ced551903a2a63b65355d8d52a
3
+ size 25362
transformer.h.0.ln_2.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebbc623a0e63121db716b484f419c25c9745190093d507b653e7a167c5c94b70
3
+ size 25362
transformer.h.0.ln_2.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70eab84a7415108ab1ecafec299d6ff952e919f17e457bbe01e15aaae3ae9b75
3
+ size 25362
transformer.h.0.mlp.c_fc.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68e83dcace83d3be81971f238e2243211171f66eb67ec78d8701a35c88ef884f
3
+ size 50014
transformer.h.0.mlp.c_fc.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af0468cb13622c904230de5553b3d7ee455f0203ee2438cc003b247487e5caac
3
+ size 50014
transformer.h.0.mlp.c_fc.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb27a2082e809dd1abdc619fc3a136553031d290e3a1164a7b530665fe2bfcfd
3
+ size 150995806
transformer.h.0.mlp.c_fc.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f3c47de760d0ed85975058102d7b7a5b888ba54d0a1f04075963e66a682dad1
3
+ size 1180510
transformer.h.0.mlp.c_fc.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d98ba386f72c051e5872ca22684002bfa1b5518e43a9d8da7f32806574f5131a
3
+ size 2360158
transformer.h.0.mlp.c_proj.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c57d49eeb51796afd6e75ded00a57f3138d10f620ce0d6be2b198904d979b66c
3
+ size 13156
transformer.h.0.mlp.c_proj.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41be5de59b96d4134288c803ea8033c55f5be912a56d0e0b9e840fafa9a57aed
3
+ size 197476
transformer.h.0.mlp.c_proj.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f64026e3fde4ec5d9b70d2a450871e380e5122e48d672e97c462412f474659e
3
+ size 150995812
transformer.h.0.mlp.c_proj.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e86bf7186cf6cf4e8a19142f3d123ca3f37717f1a2425e4e04283ffdbc4bd239
3
+ size 1180516
transformer.h.0.mlp.c_proj.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d045454bdd1bbd78c7f8289dfdb726a2fad36f9f455862578cb3350ce49341d
3
+ size 2360164
transformer.h.1.attn.c_attn.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb834ff1a4144bb99b4afe843880603b6628de90a026920ba42d8a2470aa770f
3
+ size 13671
transformer.h.1.attn.c_attn.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15d7e58a6446238f1f6cf6c1b184027aa5b343b561766932f729a5d2778aa10a
3
+ size 50023
transformer.h.1.attn.c_attn.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2edc5900ffae022bcbd5602c7ebe05068d0bcb6ce67ba4977da2f474bd824a8
3
+ size 39322471
transformer.h.1.attn.c_attn.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dbc224a6ca69b8ec5e9d5528095e62c4de3ae5b67210a377ab314e0728a334e
3
+ size 308071
transformer.h.1.attn.c_attn.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a526f7d9b8ba277a489fb5ca7589d1284531774be404b36895d8d04dcd0f8e
3
+ size 615271
transformer.h.1.attn.c_proj.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6af1a21050f9a9931f4b75ca7a0b1cd79e92224b1751d44d3e0696adb506ff2f
3
+ size 13159
transformer.h.1.attn.c_proj.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c12a1fde824d0b3bd9c4569d5583cd4d882a599a48e990fa99fa4b1cefca372
3
+ size 50023
transformer.h.1.attn.c_proj.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a0da1ed3c5adcc6ce27039f16f42f9be06bbcab00f47f241ae5da90cecd1211
3
+ size 37749607
transformer.h.1.attn.c_proj.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:139ae76ff151fcc494298cc6282f6f71e965e118aa817e77c002b997aa53e51b
3
+ size 295783
transformer.h.1.attn.c_proj.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f16086980ee8eb449cfc1ba123b97da80e6899d773e80cdb1f2082a10ecf5942
3
+ size 590695
transformer.h.1.ln_1.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c1471c6b5888329982691a654286811dbf545abebb8266ef47040176345fd3
3
+ size 25362
transformer.h.1.ln_1.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:399267253ab67d3cfdbb63348ef04b30794f45378bc5abe1172095ddee2f9284
3
+ size 25362
transformer.h.1.ln_2.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72c4800401faa63dc88730cdc0f13ea6df28b8a68083ceb505cfd80db371f5e3
3
+ size 25362
transformer.h.1.ln_2.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e816ae0d129c18e3dfe91119ca42d8236dddc5db34604e8fe9d93191f3ec8d8c
3
+ size 25362
transformer.h.1.mlp.c_fc.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd5b1493901eb6d50c879147b81ed5832f197d227de48bd96fe70844a5e5ea37
3
+ size 50014
transformer.h.1.mlp.c_fc.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5036c7b6c54afce7d050fb40cfb9f0a741eed7abed1f112dc75618c5eda41906
3
+ size 50014
transformer.h.1.mlp.c_fc.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f35574047a9792605cf60eaf8059cfdb3a64d7f53ad8ae40c4cd6da623ebbe9
3
+ size 150995806
transformer.h.1.mlp.c_fc.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb05d9621b2acd9fa34b0374285e0ff7c9343b90ea0cc53026edd9fd1cf15e49
3
+ size 1180510
transformer.h.1.mlp.c_fc.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8226e2bdac234e4964e9f1d0628c6c01d099d84d89dd959f5e61e61bdf8e0c62
3
+ size 2360158
transformer.h.1.mlp.c_proj.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d36be52d8cc71840b280f3c9c5f1415d5acbb528f0b53f30118a508706a2cad
3
+ size 13156
transformer.h.1.mlp.c_proj.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42af60c4f287c93531779362d98a74fe62a1f508ddf9494b3778e025f9759d09
3
+ size 197476
transformer.h.1.mlp.c_proj.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ca20f0735a39ecb709ec868d086dfa2e0bfb7ac52e244fcc8f940eb2d7c63de
3
+ size 150995812
transformer.h.1.mlp.c_proj.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30e5e6ef0ca0b8e2f07dd7b754c232e69d50958cbdecc04cc20f98d1028b6b14
3
+ size 1180516