Panchovix commited on
Commit
36d437b
1 Parent(s): 21dbbf2

Upload 14 files

Browse files
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MistralForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 12288,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 28672,
12
+ "max_position_embeddings": 32768,
13
+ "model_type": "mistral",
14
+ "num_attention_heads": 96,
15
+ "num_hidden_layers": 88,
16
+ "num_key_value_heads": 8,
17
+ "rms_norm_eps": 1e-05,
18
+ "rope_theta": 1000000.0,
19
+ "sliding_window": null,
20
+ "tie_word_embeddings": false,
21
+ "torch_dtype": "bfloat16",
22
+ "transformers_version": "4.42.3",
23
+ "use_cache": true,
24
+ "vocab_size": 32768
25
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.42.3"
6
+ }
output-00001-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce856eacee738cbdbd3fe514245a54ae3788c7161a435dadbdfa0095d6df4b3f
3
+ size 8583689644
output-00002-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33cafb63f8f04b9a173706ed184f0dd56a9db76d2260b63f646b7064c2f48e7c
3
+ size 8504228506
output-00003-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56769a07ec18c87451ebbecf70aba1eb0f6008024ae2fb5f0f6ffb9ef4b1618c
3
+ size 8588679832
output-00004-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33db0d1b6d2ae8954c5ff076b0bacdddc385bf495aa730b07c419d0472bd0246
3
+ size 8462536444
output-00005-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8abef56b803d3d2bc4d885383883fa66eda1fe3ce96428f08eddf70d6e22f4aa
3
+ size 8488630568
output-00006-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4c579b5c69c29dc584c11f930b23aff610873751694b5d0c828175e459e3197
3
+ size 8571493588
output-00007-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c42159d8f76eb273881ad205b55bcaaa9ac36b3bb8033a5606c1ca73b78ba0f
3
+ size 7021565176
params.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dim": 12288,
3
+ "n_layers": 88,
4
+ "head_dim": 128,
5
+ "hidden_dim": 28672,
6
+ "n_heads": 96,
7
+ "n_kv_heads": 8,
8
+ "norm_eps": 1e-05,
9
+ "vocab_size": 32768,
10
+ "rope_theta": 1000000.0
11
+ }
test.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Dict
3
+
4
+ from safetensors.torch import load_file, save_file
5
+ from huggingface_hub import split_torch_state_dict_into_shards
6
+ import torch
7
+ import os
8
+
9
+ def save_state_dict(state_dict: Dict[str, torch.Tensor], save_directory: str):
10
+ state_dict_split = split_torch_state_dict_into_shards(state_dict, filename_pattern='consolidated{suffix}.safetensors')
11
+ for filename, tensors in state_dict_split.filename_to_tensors.items():
12
+ shard = {tensor: state_dict[tensor] for tensor in tensors}
13
+ print("Saving", save_directory, filename)
14
+ save_file(shard, os.path.join(save_directory, filename))
15
+ if state_dict_split.is_sharded:
16
+ index = {
17
+ "metadata": state_dict_split.metadata,
18
+ "weight_map": state_dict_split.tensor_to_filename,
19
+ }
20
+ with open(os.path.join(save_directory, "consolidated.safetensors.index.json"), "w") as f:
21
+ f.write(json.dumps(index, indent=2))
22
+
23
+ big_file = 'consolidated.safetensors'
24
+ loaded = load_file(big_file)
25
+
26
+ save_state_dict(loaded, save_directory=f'.')
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model.v3 ADDED
Binary file (588 kB). View file
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff