reymondzzz commited on
Commit
c0cad07
1 Parent(s): 213cb65
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. config.json +39 -0
  3. lm_head.weight +3 -0
  4. transformer.h.0.attn.c_attn.bias +3 -0
  5. transformer.h.0.attn.c_attn.g_idx +3 -0
  6. transformer.h.0.attn.c_attn.qweight +3 -0
  7. transformer.h.0.attn.c_attn.qzeros +3 -0
  8. transformer.h.0.attn.c_attn.scales +3 -0
  9. transformer.h.0.attn.c_proj.bias +3 -0
  10. transformer.h.0.attn.c_proj.g_idx +3 -0
  11. transformer.h.0.attn.c_proj.qweight +3 -0
  12. transformer.h.0.attn.c_proj.qzeros +3 -0
  13. transformer.h.0.attn.c_proj.scales +3 -0
  14. transformer.h.0.ln_1.bias +3 -0
  15. transformer.h.0.ln_1.weight +3 -0
  16. transformer.h.0.ln_2.bias +3 -0
  17. transformer.h.0.ln_2.weight +3 -0
  18. transformer.h.0.mlp.c_fc.bias +3 -0
  19. transformer.h.0.mlp.c_fc.g_idx +3 -0
  20. transformer.h.0.mlp.c_fc.qweight +3 -0
  21. transformer.h.0.mlp.c_fc.qzeros +3 -0
  22. transformer.h.0.mlp.c_fc.scales +3 -0
  23. transformer.h.0.mlp.c_proj.bias +3 -0
  24. transformer.h.0.mlp.c_proj.g_idx +3 -0
  25. transformer.h.0.mlp.c_proj.qweight +3 -0
  26. transformer.h.0.mlp.c_proj.qzeros +3 -0
  27. transformer.h.0.mlp.c_proj.scales +3 -0
  28. transformer.h.1.attn.c_attn.bias +3 -0
  29. transformer.h.1.attn.c_attn.g_idx +3 -0
  30. transformer.h.1.attn.c_attn.qweight +3 -0
  31. transformer.h.1.attn.c_attn.qzeros +3 -0
  32. transformer.h.1.attn.c_attn.scales +3 -0
  33. transformer.h.1.attn.c_proj.bias +3 -0
  34. transformer.h.1.attn.c_proj.g_idx +3 -0
  35. transformer.h.1.attn.c_proj.qweight +3 -0
  36. transformer.h.1.attn.c_proj.qzeros +3 -0
  37. transformer.h.1.attn.c_proj.scales +3 -0
  38. transformer.h.1.ln_1.bias +3 -0
  39. transformer.h.1.ln_1.weight +3 -0
  40. transformer.h.1.ln_2.bias +3 -0
  41. transformer.h.1.ln_2.weight +3 -0
  42. transformer.h.1.mlp.c_fc.bias +3 -0
  43. transformer.h.1.mlp.c_fc.g_idx +3 -0
  44. transformer.h.1.mlp.c_fc.qweight +3 -0
  45. transformer.h.1.mlp.c_fc.qzeros +3 -0
  46. transformer.h.1.mlp.c_fc.scales +3 -0
  47. transformer.h.1.mlp.c_proj.bias +3 -0
  48. transformer.h.1.mlp.c_proj.g_idx +3 -0
  49. transformer.h.1.mlp.c_proj.qweight +3 -0
  50. transformer.h.1.mlp.c_proj.qzeros +3 -0
.gitattributes CHANGED
@@ -32,3 +32,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ *.bias filter=lfs diff=lfs merge=lfs -text
36
+ *.weight filter=lfs diff=lfs merge=lfs -text
37
+ *.g_idx filter=lfs diff=lfs merge=lfs -text
38
+ *.qweight filter=lfs diff=lfs merge=lfs -text
39
+ *.qzeros filter=lfs diff=lfs merge=lfs -text
40
+ *.scales filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/fsx/bigcode/experiments/pretraining/conversions/6672/large-model",
3
+ "activation_function": "gelu",
4
+ "architectures": [
5
+ "GPTBigCodeForCausalLM"
6
+ ],
7
+ "attention_softmax_in_fp32": true,
8
+ "multi_query": true,
9
+ "attn_pdrop": 0.1,
10
+ "bos_token_id": 0,
11
+ "embd_pdrop": 0.1,
12
+ "eos_token_id": 0,
13
+ "inference_runner": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_batch_size": null,
17
+ "max_sequence_length": null,
18
+ "model_type": "gpt_bigcode",
19
+ "n_embd": 6144,
20
+ "n_head": 48,
21
+ "n_inner": 24576,
22
+ "n_layer": 40,
23
+ "n_positions": 8192,
24
+ "pad_key_length": true,
25
+ "pre_allocate_kv_cache": false,
26
+ "resid_pdrop": 0.1,
27
+ "scale_attention_softmax_in_fp32": true,
28
+ "scale_attn_weights": true,
29
+ "summary_activation": null,
30
+ "summary_first_dropout": 0.1,
31
+ "summary_proj_to_labels": true,
32
+ "summary_type": "cls_index",
33
+ "summary_use_proj": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.28.1",
36
+ "use_cache": true,
37
+ "validate_runner_input": true,
38
+ "vocab_size": 49152
39
+ }
lm_head.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:529afc429e3dda328ffb4cb3c2358a504d5508daf91cd0bebd59d443096afd05
3
+ size 1207960299
transformer.h.0.attn.c_attn.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13eabe7b4cf71f0da24052ded6a5b07855e2b1f2af56e15de6a17d1f6e642936
3
+ size 13671
transformer.h.0.attn.c_attn.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d5e0b0438fdaa07e9f96b001ab10db4048c165859a8b137d508dae33f15b3d
3
+ size 50023
transformer.h.0.attn.c_attn.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf87f27fee7e2512170fa90e30e195fcc6d76c3442b686fd4f78e4e912af1071
3
+ size 19661671
transformer.h.0.attn.c_attn.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f04f557e8c0e8b468e8495d801ec123c89804a9029da580e7baf4f00237b296
3
+ size 154471
transformer.h.0.attn.c_attn.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:914b3936aa9e926a6eedbf23ad7e5234b3ce22b596cfe4153792604e3e53720f
3
+ size 615271
transformer.h.0.attn.c_proj.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d1b0f449ad065cb6e725c000d86a7c2f995b7948db16b57c3256dec30f7a89a
3
+ size 13159
transformer.h.0.attn.c_proj.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e43fc5060b3ba006fcb1ddc34f30fa699320efa737d5837b9aa47d8e0d44e24
3
+ size 50023
transformer.h.0.attn.c_proj.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc5d64b6bcfae69f748cc767d19b880c97adc12946b33b5a1aa7d6fbfe0561fd
3
+ size 18875239
transformer.h.0.attn.c_proj.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de5829b87f51f0a009a81fa0363c275473859499df0b16d6c23b163fa8c13098
3
+ size 148327
transformer.h.0.attn.c_proj.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc37708eb1acfc23bab3c1ece2ac42a7f1226bf316190a135ada4f0b292a32f6
3
+ size 590695
transformer.h.0.ln_1.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7a16d83afa3a6998832bb0567b0b99d07207d85e681d561e75f96cfef63a023
3
+ size 25362
transformer.h.0.ln_1.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a4d24d1a3efb0f034385808df30bef44cb41ced551903a2a63b65355d8d52a
3
+ size 25362
transformer.h.0.ln_2.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebbc623a0e63121db716b484f419c25c9745190093d507b653e7a167c5c94b70
3
+ size 25362
transformer.h.0.ln_2.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70eab84a7415108ab1ecafec299d6ff952e919f17e457bbe01e15aaae3ae9b75
3
+ size 25362
transformer.h.0.mlp.c_fc.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68e83dcace83d3be81971f238e2243211171f66eb67ec78d8701a35c88ef884f
3
+ size 50014
transformer.h.0.mlp.c_fc.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbc9107c4311f345e1dc4879d498b83040872a716f3aacb7b9439d1ef6a8d48e
3
+ size 50014
transformer.h.0.mlp.c_fc.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e95cf206352d56ddbe27701f0fd8c9b28c05e00173b06ed57ab2c0632d2eccce
3
+ size 75498334
transformer.h.0.mlp.c_fc.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:519a36039aa7235b47ae1046a23daccfe52664611721a62a19dd3db0bf51fba3
3
+ size 590686
transformer.h.0.mlp.c_fc.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69b3efd91847c9d3ea03c4536873403a10efa1914749fe50548b3984d06865d
3
+ size 2360158
transformer.h.0.mlp.c_proj.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c57d49eeb51796afd6e75ded00a57f3138d10f620ce0d6be2b198904d979b66c
3
+ size 13156
transformer.h.0.mlp.c_proj.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0878060b7d566b2f8beda3baa5d5dd9ad284efd6f9052867552094fb8db593a
3
+ size 197476
transformer.h.0.mlp.c_proj.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dea9f8788b52e59110da29b3a97020b2031a3badb52df33ea0be3183846ce45
3
+ size 75498340
transformer.h.0.mlp.c_proj.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10fd54273f9c8711503625f6056282bd0b743dab033664ad75f174b549bf814
3
+ size 590692
transformer.h.0.mlp.c_proj.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4889ca3ec0464abd122002625d20efb8eea9d4e3d4c99e5b1912531d214a558
3
+ size 2360164
transformer.h.1.attn.c_attn.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb834ff1a4144bb99b4afe843880603b6628de90a026920ba42d8a2470aa770f
3
+ size 13671
transformer.h.1.attn.c_attn.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:837652f8d707670d4992cc0503d19b80aeccba81baed8afca37e77fecd1ea1d8
3
+ size 50023
transformer.h.1.attn.c_attn.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03df379e36328b3e76d93cb812889fe3d91d069a23c3a9c79ac068fd9c12777f
3
+ size 19661671
transformer.h.1.attn.c_attn.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd3696affbaf51408a5c5be2825d88c2824c003b615642973d2f2e04e7503b15
3
+ size 154471
transformer.h.1.attn.c_attn.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4716c9e83c50adfc6fd8b6632da767fdff2185b4556d1ced0eb376d3fd72521
3
+ size 615271
transformer.h.1.attn.c_proj.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6af1a21050f9a9931f4b75ca7a0b1cd79e92224b1751d44d3e0696adb506ff2f
3
+ size 13159
transformer.h.1.attn.c_proj.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a0067073f9a3a818b96d4cb02c517a0ea3393d644ebd547a7b21684dc7914ac
3
+ size 50023
transformer.h.1.attn.c_proj.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20030d760f16f805998fb07eaf0532d829f4ae75604cefa6d3b830f939c80664
3
+ size 18875239
transformer.h.1.attn.c_proj.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74591b5df3935ebb4be236638e7c86d11218369319ac80c6719f9f07161dc665
3
+ size 148327
transformer.h.1.attn.c_proj.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3abcd0fd2037f9151058a00f2f092d20cae8bfc3bc229a02c61135e1fbdbab1
3
+ size 590695
transformer.h.1.ln_1.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c1471c6b5888329982691a654286811dbf545abebb8266ef47040176345fd3
3
+ size 25362
transformer.h.1.ln_1.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:399267253ab67d3cfdbb63348ef04b30794f45378bc5abe1172095ddee2f9284
3
+ size 25362
transformer.h.1.ln_2.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72c4800401faa63dc88730cdc0f13ea6df28b8a68083ceb505cfd80db371f5e3
3
+ size 25362
transformer.h.1.ln_2.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e816ae0d129c18e3dfe91119ca42d8236dddc5db34604e8fe9d93191f3ec8d8c
3
+ size 25362
transformer.h.1.mlp.c_fc.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd5b1493901eb6d50c879147b81ed5832f197d227de48bd96fe70844a5e5ea37
3
+ size 50014
transformer.h.1.mlp.c_fc.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f12f4552a088630abe51817685aafc7f84f0e7fa7cb921968ac14edb088a6b15
3
+ size 50014
transformer.h.1.mlp.c_fc.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4f3fa4d3a7b4ad40b28c1fe63791aecc769c51d9d14e7b04ad10638afe3b1c8
3
+ size 75498334
transformer.h.1.mlp.c_fc.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1b40496001d16e63198067080be7aaf431816abf058243ae2ba5563c33deea3
3
+ size 590686
transformer.h.1.mlp.c_fc.scales ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c268a5b076970ba3c8f4e6610b983a018c42a6e14364bc812f6693f8b25d233
3
+ size 2360158
transformer.h.1.mlp.c_proj.bias ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d36be52d8cc71840b280f3c9c5f1415d5acbb528f0b53f30118a508706a2cad
3
+ size 13156
transformer.h.1.mlp.c_proj.g_idx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3e6284f8729de3e706f54f08e1e35075099e5632ce28af9d1a9f46e882113c7
3
+ size 197476
transformer.h.1.mlp.c_proj.qweight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75d980bb938c09d043b257883b0df6c14286b9e1dceecc72c4aa8b5893670896
3
+ size 75498340
transformer.h.1.mlp.c_proj.qzeros ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1bbcf7a439f06f4a9126a841ad2b7a67779f07af4770801e62d8ce87cbbe2cb
3
+ size 590692