tomeras1 commited on
Commit
76233f1
1 Parent(s): 61c8679

Move to in-library checkpoint

Browse files
config.json CHANGED
@@ -12,7 +12,6 @@
12
  "AutoModelForSequenceClassification": "model.JambaForSequenceClassification"
13
  },
14
  "bos_token_id": 1,
15
- "calc_logits_for_entire_prompt": false,
16
  "eos_token_id": 2,
17
  "expert_layer_offset": 1,
18
  "expert_layer_period": 2,
@@ -25,15 +24,15 @@
25
  "mamba_d_state": 16,
26
  "mamba_dt_rank": 256,
27
  "mamba_expand": 2,
28
- "mamba_inner_layernorms": true,
29
  "mamba_proj_bias": false,
 
30
  "model_type": "jamba",
31
- "n_ctx": 262144,
32
  "num_attention_heads": 32,
33
  "num_experts": 16,
34
  "num_experts_per_tok": 2,
35
  "num_hidden_layers": 32,
36
  "num_key_value_heads": 8,
 
37
  "output_router_logits": false,
38
  "pad_token_id": 0,
39
  "rms_norm_eps": 1e-06,
@@ -41,7 +40,7 @@
41
  "sliding_window": null,
42
  "tie_word_embeddings": false,
43
  "torch_dtype": "bfloat16",
44
- "transformers_version": "4.40.0.dev0",
45
  "use_cache": true,
46
  "use_mamba_kernels": true,
47
  "vocab_size": 65536
 
12
  "AutoModelForSequenceClassification": "model.JambaForSequenceClassification"
13
  },
14
  "bos_token_id": 1,
 
15
  "eos_token_id": 2,
16
  "expert_layer_offset": 1,
17
  "expert_layer_period": 2,
 
24
  "mamba_d_state": 16,
25
  "mamba_dt_rank": 256,
26
  "mamba_expand": 2,
 
27
  "mamba_proj_bias": false,
28
+ "max_position_embeddings": 262144,
29
  "model_type": "jamba",
 
30
  "num_attention_heads": 32,
31
  "num_experts": 16,
32
  "num_experts_per_tok": 2,
33
  "num_hidden_layers": 32,
34
  "num_key_value_heads": 8,
35
+ "num_logits_to_keep": 1,
36
  "output_router_logits": false,
37
  "pad_token_id": 0,
38
  "rms_norm_eps": 1e-06,
 
40
  "sliding_window": null,
41
  "tie_word_embeddings": false,
42
  "torch_dtype": "bfloat16",
43
+ "transformers_version": "4.40.1",
44
  "use_cache": true,
45
  "use_mamba_kernels": true,
46
  "vocab_size": 65536
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.40.0.dev0"
7
  }
 
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 0,
6
+ "transformers_version": "4.40.1"
7
  }
metadata-1714912566.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "args": {
3
+ "legacy_model_path": "gs://ai21-algo-studio-research/tomer/published_Jamba-v0.1_new_format/legacy_jamba-v0.1",
4
+ "model_output_path": "gs://ai21-algo-studio-research/tomer/published_Jamba-v0.1_new_format/script_from_legacy_fmt"
5
+ },
6
+ "datetime": "2024-05-05 12:36:06",
7
+ "git_branch": "official-hf-format",
8
+ "git_short_sha": "0693ce8",
9
+ "script": "/app/scripts/convert_legacy_hf_to_official_hf.py",
10
+ "timestamp": 1714912566
11
+ }
model-00001-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce46bbcbda10cfac6b5855da022777a0387e2b729cbdd219081fa3f69cb214a2
3
- size 4951236864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aace34ee0da3bf95605bd150fff6d3e78110be4048a3c389b0a740354b2ccb7
3
+ size 4951761424
model-00002-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ae0b82247f3164270f151fa12ea1ceb63992e8827c739319fe20342eadafa8a
3
- size 4884145024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ba1de67a86329431f14f7ffa165d84055d32ce57a6d2314e3b2464eac3732dc
3
+ size 4884669624
model-00003-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2d44419116a65b3617fa35d20b69e2060449b53c0ac36192a3ec4b0a60b0a8d
3
- size 4992294632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1abc4f16865fb78241c9453292ee3b2ca2c1e2d54ee945631da625834b95c9b2
3
+ size 4992557120
model-00004-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70fe04d7dc1124871ca1f6071504ba019174db27cd57c625938e6383ebee5fee
3
- size 4958591040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45fab97739a58e924791572ea3d06f9c90b9ff2a299460aaa4bd87c6e9d424f3
3
+ size 4958853560
model-00005-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:402079425e45a01c256a080cae3ab39be3f3cfae56dba7c815a44f0c58b3a442
3
- size 4975501296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4b0ec6e8f33e6d7b1f837cd4c25818487dcc7e478734606da28110507e51c97
3
+ size 4975763832
model-00006-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cc9971c058d95a8f13966a3aa82294564381937902634c0c064be68104821ae
3
- size 4884145016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed98d5c3c8d7ab7352944bea09b0d54d98066cf567ba3d069da12c05575d56ed
3
+ size 4884669616
model-00007-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9ba83c87790cdb6fb9f7861a712f315469edbf065ab64bdaa35cc99b4ec8746
3
- size 4884144968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:735be2bc568711bf42a4caebcda8288dd300b31b48fa098b00df3cf1a98e10e2
3
+ size 4884669640
model-00008-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b45331970c155ca74f509576cb050d006997bef08a99189cf047aa1a3a4b254e
3
- size 4992294696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0c8d817b2b47661d361e8b520128b3194185f756cc2204a95d642e24895ee51
3
+ size 4992557176
model-00009-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe5a1e58d58598a64a59a3ca87c170a171a7bba2102138c71047d5b5458cdebf
3
- size 4932506800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e50222cf865ca5678d22574b131294303c46b249478cf70113c701f70331e999
3
+ size 4932507176
model-00010-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd37916e35b2b3b98e7a9bb790a779ac51ad0bbcff92428c0ed11c8839379205
3
- size 4884145056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1b4b69b24ae55827b6c8b1e4a10807aa3525bc85f4d34dc002ac7440757fbf4
3
+ size 4884669672
model-00011-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:392435bc85f4c90bf129c30260da8c820f35bca91610aa0e682cb915f1d855c6
3
- size 4884145088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60213cac13b92ed34b93ce48e670434f22e3bf8b2b8df20c60b7bf8a9515c35c
3
+ size 4884669696
model-00012-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5141158f7a755a7e0f60c73f4c25ba02c2bfdab548944f8d4146f41391c621a
3
- size 4884145088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05805eacd3bb40cc9da802350409f1cb078e8b276da7e06c7a8a5ca5b26cc887
3
+ size 4884669688
model-00013-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14ce5aabc4a17e54e40b30fba322104dd19bad512bab6e554fa56bafe4433da7
3
- size 4932506800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:201df979a1b34ced6cdbb7a790163412636779f1119e3845a704c489181d03d2
3
+ size 4932507176
model-00014-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd79a548e39ee02f6a9b553f93f6652783c9dbc895ab685848d9e1655903965f
3
- size 4992294648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0a7eb42a9ea3a385442c2e758dd5efd5dc5b913f1d10bfd37792cc963a33c93
3
+ size 4992557152
model-00015-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb5af1275e6a0c5bc2c195e1802a64cee6aa92e3a11fcff5acd8b7bbf720ef75
3
- size 4884145088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4b9afe4398000c28b36e3aa40c87086af673d4f8a64bfc5767941ab2008bcc9
3
+ size 4884669688
model-00016-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9efc22a654010417091851b00277db7116e8c532ae5410cacc13bfa49b99c06
3
- size 4884145088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd1ac6cc861971c43bdf0c9c6d4c9fe72d33e5227e054a621e2e68f001419763
3
+ size 4884669688
model-00017-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec7ab387e62b0c65a3567cc4d17d13166b577cf89ff59a8d5d7b248fdbbc68da
3
- size 4908260352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52d9eea696dd29ef413d617bbcb62a9f159e8fe8170d36e018932cef45ee281d
3
+ size 4908522856
model-00018-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f65a30ff1d8e1fc086460839056e7bc7a6a2ef81f0df35dc1a752bf951f92df
3
- size 4908391496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77acada7c098e81280645ea0a9dbfa00196dca6da8946498b9907e9e376fb42d
3
+ size 4908654000
model-00019-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7975019ffa4bb6f502e3406a53ef61ee08085330502ba32fb3e9883b7033c8c7
3
- size 4992294688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e10dfd6c6459cd3460b1d667639717d3657274c1694c19a6fdbac1be6a76bf
3
+ size 4992557168
model-00020-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6363d3d6f89d09a971af839cd923a206a06e73d090ae74a605ed27e97fab93cf
3
- size 4884145088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bd5c27b2cca6e06f7b4497ce8c9b1522a64846817a871bad274d08507960ed0
3
+ size 4884669696
model-00021-of-00021.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a21e65470d7dbe4ae849be427eb5366cc7cc311138cc7f943f3d71d84b7c7ffd
3
- size 4647318256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a47ef23db8deb5364da676a40dc3dcb011fb9d9ceef13ba044c176e9a83ac1e3
3
+ size 4647318576
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -1,6 +1,30 @@
1
  {
2
- "bos_token": "<|startoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "pad_token": "<|pad|>",
5
- "unk_token": "<|unk|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|pad|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|unk|>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
  }