Add files

Browse files

Files changed (15) hide show

.gitattributes +1 -0
1_Pooling/config.json +9 -0
README.md +86 -0
config.json +33 -0
config_sentence_transformers.json +7 -0
evaluation/beir.json +1 -0
modules.json +14 -0
pytorch_model-00001-of-00003.bin +3 -0
pytorch_model-00002-of-00003.bin +3 -0
pytorch_model-00003-of-00003.bin +3 -0
pytorch_model.bin.index.json +372 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +6 -0
tokenizer.json +3 -0
tokenizer_config.json +11 -0

.gitattributes CHANGED Viewed

@@ -29,3 +29,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "word_embedding_dimension": 4096,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": false,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": true,
+  "pooling_mode_lasttoken": false
+}

README.md ADDED Viewed

	@@ -0,0 +1,86 @@

+---
+pipeline_tag: sentence-similarity
+tags:
+- sentence-transformers
+- feature-extraction
+- sentence-similarity
+---
+# {MODEL_NAME}
+This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 4096 dimensional dense vector space and can be used for tasks like clustering or semantic search.
+<!--- Describe your model here -->
+## Usage (Sentence-Transformers)
+Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
+```
+pip install -U sentence-transformers
+```
+Then you can use the model like this:
+```python
+from sentence_transformers import SentenceTransformer
+sentences = ["This is an example sentence", "Each sentence is converted"]
+model = SentenceTransformer('{MODEL_NAME}')
+embeddings = model.encode(sentences)
+print(embeddings)
+```
+## Evaluation Results
+<!--- Describe how your model was evaluated -->
+For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name={MODEL_NAME})
+## Training
+The model was trained with the parameters:
+**DataLoader**:
+`torch.utils.data.dataloader.DataLoader` of length 15600 with parameters:
+```
+{'batch_size': 32, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
+```
+**Loss**:
+`sentence_transformers.losses.MultipleNegativesRankingLoss.MNRLGradCache`
+Parameters of the fit()-Method:
+```
+{
+    "epochs": 10,
+    "evaluation_steps": 0,
+    "evaluator": "NoneType",
+    "max_grad_norm": 1,
+    "optimizer_class": "<class 'transformers.optimization.AdamW'>",
+    "optimizer_params": {
+        "lr": 0.0004
+    },
+    "scheduler": "WarmupLinear",
+    "steps_per_epoch": null,
+    "warmup_steps": 1000,
+    "weight_decay": 0.01
+}
+```
+## Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 300, 'do_lower_case': False}) with Transformer model: BloomModel
+  (1): Pooling({'word_embedding_dimension': 4096, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': True, 'pooling_mode_lasttoken': False})
+)
+```
+## Citing & Authors
+<!--- Describe where people can find more information -->

config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_name_or_path": "/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3/bloom-7b1",
+  "apply_residual_connection_post_layernorm": false,
+  "architectures": [
+    "BloomModel"
+  ],
+  "attention_dropout": 0.0,
+  "attention_softmax_in_fp32": true,
+  "bias_dropout_fusion": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_dropout": 0.0,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "masked_softmax_fusion": true,
+  "model_type": "bloom",
+  "n_embed": 4096,
+  "n_inner": null,
+  "n_layer": 30,
+  "num_attention_heads": 32,
+  "offset_alibi": 100,
+  "pad_token_id": 3,
+  "pretraining_tp": 4,
+  "seq_length": 2048,
+  "skip_bias_add": true,
+  "skip_bias_add_qkv": false,
+  "slow_but_exact": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.20.1",
+  "unk_token_id": 0,
+  "use_cache": true,
+  "vocab_size": 250682
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "__version__": {
+    "sentence_transformers": "2.1.0",
+    "transformers": "4.20.1",
+    "pytorch": "1.12.0"
+  }
+}

evaluation/beir.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"ndcgs": {"sgpt-bloom-7b1-msmarco": {"scifact": {"NDCG@1": 0.59, "NDCG@3": 0.66868, "NDCG@5": 0.69178, "NDCG@10": 0.71824, "NDCG@100": 0.74152, "NDCG@1000": 0.74616}, "nfcorpus": {"NDCG@1": 0.45975, "NDCG@3": 0.41542, "NDCG@5": 0.39154, "NDCG@10": 0.35748, "NDCG@100": 0.32859, "NDCG@1000": 0.41791}, "arguana": {"NDCG@1": 0.23542, "NDCG@3": 0.36629, "NDCG@5": 0.41642, "NDCG@10": 0.47281, "NDCG@100": 0.52192, "NDCG@1000": 0.52529}, "cqadupstack_webmasters": {"NDCG@1": 0.27668, "NDCG@3": 0.32185, "NDCG@5": 0.33953, "NDCG@10": 0.36713, "NDCG@100": 0.42275, "NDCG@1000": 0.45387}, "cqadupstack_android": {"NDCG@1": 0.33476, "NDCG@3": 0.3679, "NDCG@5": 0.39528, "NDCG@10": 0.42525, "NDCG@100": 0.47769, "NDCG@1000": 0.50024}, "cqadupstack_mathematica": {"NDCG@1": 0.15796, "NDCG@3": 0.20281, "NDCG@5": 0.22875, "NDCG@10": 0.25536, "NDCG@100": 0.31178, "NDCG@1000": 0.34362}, "cqadupstack_english": {"NDCG@1": 0.37325, "NDCG@3": 0.40509, "NDCG@5": 0.42261, "NDCG@10": 0.44531, "NDCG@100": 0.48826, "NDCG@1000": 0.50904}, "cqadupstack_gaming": {"NDCG@1": 0.40313, "NDCG@3": 0.46762, "NDCG@5": 0.4959, "NDCG@10": 0.52259, "NDCG@100": 0.56564, "NDCG@1000": 0.5797}, "scidocs": {"NDCG@1": 0.208, "NDCG@3": 0.17601, "NDCG@5": 0.15464, "NDCG@10": 0.18435, "NDCG@100": 0.25829, "NDCG@1000": 0.30881}, "cqadupstack_programmers": {"NDCG@1": 0.2968, "NDCG@3": 0.33463, "NDCG@5": 0.35794, "NDCG@10": 0.38225, "NDCG@100": 0.44025, "NDCG@1000": 0.46704}, "cqadupstack_physics": {"NDCG@1": 0.31954, "NDCG@3": 0.36422, "NDCG@5": 0.39347, "NDCG@10": 0.41971, "NDCG@100": 0.47164, "NDCG@1000": 0.49612}, "cqadupstack_gis": {"NDCG@1": 0.25424, "NDCG@3": 0.31326, "NDCG@5": 0.33256, "NDCG@10": 0.35723, "NDCG@100": 0.41311, "NDCG@1000": 0.43261}, "cqadupstack_unix": {"NDCG@1": 0.27985, "NDCG@3": 0.3171, "NDCG@5": 0.33422, "NDCG@10": 0.35778, "NDCG@100": 0.41138, "NDCG@1000": 0.43812}, "cqadupstack_stats": {"NDCG@1": 0.23773, "NDCG@3": 0.28818, "NDCG@5": 0.30341, "NDCG@10": 0.32666, "NDCG@100": 0.36923, "NDCG@1000": 0.39845}, "fiqa": {"NDCG@1": 0.34105, "NDCG@3": 0.31934, "NDCG@5": 0.33612, "NDCG@10": 0.35736, "NDCG@100": 0.42409, "NDCG@1000": 0.45831}, "cqadupstack_wordpress": {"NDCG@1": 0.21627, "NDCG@3": 0.26489, "NDCG@5": 0.29248, "NDCG@10": 0.31721, "NDCG@100": 0.36279, "NDCG@1000": 0.39041}, "cqadupstack_tex": {"NDCG@1": 0.19718, "NDCG@3": 0.23505, "NDCG@5": 0.25141, "NDCG@10": 0.27375, "NDCG@100": 0.32004, "NDCG@1000": 0.35069}, "cqadupstack": {"NDCG@1": 0.2789491666666667, "NDCG@3": 0.32354999999999995, "NDCG@5": 0.3456300000000001, "NDCG@10": 0.3708525, "NDCG@100": 0.42121333333333333, "NDCG@1000": 0.4466591666666666}, "quora": {"NDCG@1": 0.6307, "NDCG@3": 0.69659, "NDCG@5": 0.72302, "NDCG@10": 0.74655, "NDCG@100": 0.77546, "NDCG@1000": 0.77864}, "trec-covid": {"NDCG@1": 0.88, "NDCG@3": 0.86877, "NDCG@5": 0.84621, "NDCG@10": 0.82731, "NDCG@100": 0.61742, "NDCG@1000": 0.52225}, "webis-touche2020": {"NDCG@1": 0.26531, "NDCG@3": 0.26197, "NDCG@5": 0.24561, "NDCG@10": 0.2365, "NDCG@100": 0.35251, "NDCG@1000": 0.46791}}}, "maps": {"sgpt-bloom-7b1-msmarco": {"scifact": {"MAP@1": 0.55661, "MAP@3": 0.6387, "MAP@5": 0.65383, "MAP@10": 0.66782, "MAP@100": 0.67334, "MAP@1000": 0.67348}, "nfcorpus": {"MAP@1": 0.0569, "MAP@3": 0.09625, "MAP@5": 0.11132, "MAP@10": 0.1309, "MAP@100": 0.16717, "MAP@1000": 0.18237}, "arguana": {"MAP@1": 0.23542, "MAP@3": 0.33345, "MAP@5": 0.36112, "MAP@10": 0.3848, "MAP@100": 0.3964, "MAP@1000": 0.39655}, "cqadupstack_webmasters": {"MAP@1": 0.23058, "MAP@3": 0.28613, "MAP@5": 0.29925, "MAP@10": 0.31308, "MAP@100": 0.32818, "MAP@1000": 0.33044}, "cqadupstack_android": {"MAP@1": 0.2696, "MAP@3": 0.32762, "MAP@5": 0.34832, "MAP@10": 0.36428, "MAP@100": 0.37719, "MAP@1000": 0.37843}, "cqadupstack_mathematica": {"MAP@1": 0.12403, "MAP@3": 0.17655, "MAP@5": 0.1919, "MAP@10": 0.20359, "MAP@100": 0.21506, "MAP@1000": 0.21639}, "cqadupstack_english": {"MAP@1": 0.29571, "MAP@3": 0.36207, "MAP@5": 0.37697, "MAP@10": 0.39027, "MAP@100": 0.40193, "MAP@1000": 0.40318}, "cqadupstack_gaming": {"MAP@1": 0.3551, "MAP@3": 0.43422, "MAP@5": 0.45209, "MAP@10": 0.4655, "MAP@100": 0.47634, "MAP@1000": 0.47699}, "scidocs": {"MAP@1": 0.04223, "MAP@3": 0.07854, "MAP@5": 0.09393, "MAP@10": 0.10847, "MAP@100": 0.12704, "MAP@1000": 0.12977}, "cqadupstack_programmers": {"MAP@1": 0.23765, "MAP@3": 0.2984, "MAP@5": 0.31543, "MAP@10": 0.32737, "MAP@100": 0.34121, "MAP@1000": 0.34252}, "cqadupstack_physics": {"MAP@1": 0.2679, "MAP@3": 0.32851, "MAP@5": 0.34813, "MAP@10": 0.36139, "MAP@100": 0.37356, "MAP@1000": 0.3749}, "cqadupstack_gis": {"MAP@1": 0.234, "MAP@3": 0.28982, "MAP@5": 0.30107, "MAP@10": 0.3118, "MAP@100": 0.3229, "MAP@1000": 0.32358}, "cqadupstack_unix": {"MAP@1": 0.24157, "MAP@3": 0.29199, "MAP@5": 0.30319, "MAP@10": 0.31382, "MAP@100": 0.32501, "MAP@1000": 0.32611}, "cqadupstack_stats": {"MAP@1": 0.21433, "MAP@3": 0.26445, "MAP@5": 0.27429, "MAP@10": 0.28455, "MAP@100": 0.29284, "MAP@1000": 0.29398}, "fiqa": {"MAP@1": 0.17039, "MAP@3": 0.24454, "MAP@5": 0.26793, "MAP@10": 0.28384, "MAP@100": 0.30107, "MAP@1000": 0.303}, "cqadupstack_wordpress": {"MAP@1": 0.20123, "MAP@3": 0.24563, "MAP@5": 0.26177, "MAP@10": 0.27227, "MAP@100": 0.28093, "MAP@1000": 0.28198}, "cqadupstack_tex": {"MAP@1": 0.16314, "MAP@3": 0.20877, "MAP@5": 0.21981, "MAP@10": 0.23003, "MAP@100": 0.23921, "MAP@1000": 0.24047}, "quora": {"MAP@1": 0.54941, "MAP@3": 0.65382, "MAP@5": 0.67572, "MAP@10": 0.69008, "MAP@100": 0.70003, "MAP@1000": 0.70036}, "trec-covid": {"MAP@1": 0.00251, "MAP@3": 0.00716, "MAP@5": 0.01121, "MAP@10": 0.02093, "MAP@100": 0.12252, "MAP@1000": 0.27831}, "webis-touche2020": {"MAP@1": 0.02322, "MAP@3": 0.04791, "MAP@5": 0.06072, "MAP@10": 0.08799, "MAP@100": 0.15053, "MAP@1000": 0.16628}}}, "recalls": {"sgpt-bloom-7b1-msmarco": {"scifact": {"Recall@1": 0.55661, "Recall@3": 0.72433, "Recall@5": 0.78361, "Recall@10": 0.858, "Recall@100": 0.96167, "Recall@1000": 1.0}, "nfcorpus": {"Recall@1": 0.0569, "Recall@3": 0.10864, "Recall@5": 0.13256, "Recall@10": 0.17061, "Recall@100": 0.33703, "Recall@1000": 0.66706}, "arguana": {"Recall@1": 0.23542, "Recall@3": 0.46159, "Recall@5": 0.58393, "Recall@10": 0.75605, "Recall@100": 0.97013, "Recall@1000": 0.99573}, "cqadupstack_webmasters": {"Recall@1": 0.23058, "Recall@3": 0.34204, "Recall@5": 0.38728, "Recall@10": 0.47347, "Recall@100": 0.72013, "Recall@1000": 0.92319}, "cqadupstack_android": {"Recall@1": 0.2696, "Recall@3": 0.38594, "Recall@5": 0.45881, "Recall@10": 0.54882, "Recall@100": 0.77388, "Recall@1000": 0.92183}, "cqadupstack_mathematica": {"Recall@1": 0.12403, "Recall@3": 0.23551, "Recall@5": 0.2988, "Recall@10": 0.3762, "Recall@100": 0.62595, "Recall@1000": 0.85285}, "cqadupstack_english": {"Recall@1": 0.29571, "Recall@3": 0.41956, "Recall@5": 0.46965, "Recall@10": 0.53965, "Recall@100": 0.7261, "Recall@1000": 0.85665}, "cqadupstack_gaming": {"Recall@1": 0.3551, "Recall@3": 0.51142, "Recall@5": 0.58008, "Recall@10": 0.65914, "Recall@100": 0.84275, "Recall@1000": 0.94272}, "scidocs": {"Recall@1": 0.04223, "Recall@3": 0.10038, "Recall@5": 0.13857, "Recall@10": 0.19287, "Recall@100": 0.40922, "Recall@1000": 0.65665}, "cqadupstack_programmers": {"Recall@1": 0.23765, "Recall@3": 0.35897, "Recall@5": 0.42129, "Recall@10": 0.49281, "Recall@100": 0.74021, "Recall@1000": 0.92297}, "cqadupstack_physics": {"Recall@1": 0.2679, "Recall@3": 0.39176, "Recall@5": 0.4631, "Recall@10": 0.54045, "Recall@100": 0.76192, "Recall@1000": 0.92081}, "cqadupstack_gis": {"Recall@1": 0.234, "Recall@3": 0.356, "Recall@5": 0.40299, "Recall@10": 0.47713, "Recall@100": 0.73541, "Recall@1000": 0.88709}, "cqadupstack_unix": {"Recall@1": 0.24157, "Recall@3": 0.34361, "Recall@5": 0.38646, "Recall@10": 0.45523, "Recall@100": 0.69103, "Recall@1000": 0.88251}, "cqadupstack_stats": {"Recall@1": 0.21433, "Recall@3": 0.32467, "Recall@5": 0.36117, "Recall@10": 0.42994, "Recall@100": 0.62737, "Recall@1000": 0.84587}, "fiqa": {"Recall@1": 0.17039, "Recall@3": 0.29267, "Recall@5": 0.35873, "Recall@10": 0.42707, "Recall@100": 0.67557, "Recall@1000": 0.88364}, "cqadupstack_wordpress": {"Recall@1": 0.20123, "Recall@3": 0.29901, "Recall@5": 0.36537, "Recall@10": 0.43971, "Recall@100": 0.65135, "Recall@1000": 0.86095}, "cqadupstack_tex": {"Recall@1": 0.16314, "Recall@3": 0.26079, "Recall@5": 0.30313, "Recall@10": 0.36994, "Recall@100": 0.58138, "Recall@1000": 0.79975}, "quora": {"Recall@1": 0.54941, "Recall@3": 0.73605, "Recall@5": 0.8037, "Recall@10": 0.87129, "Recall@100": 0.98129, "Recall@1000": 0.99825}, "trec-covid": {"Recall@1": 0.00251, "Recall@3": 0.00738, "Recall@5": 0.01163, "Recall@10": 0.02236, "Recall@100": 0.15228, "Recall@1000": 0.48675}, "webis-touche2020": {"Recall@1": 0.02322, "Recall@3": 0.06065, "Recall@5": 0.08608, "Recall@10": 0.15224, "Recall@100": 0.45549, "Recall@1000": 0.80094}}}, "precisions": {"sgpt-bloom-7b1-msmarco": {"scifact": {"P@1": 0.59, "P@3": 0.26444, "P@5": 0.174, "P@10": 0.09733, "P@100": 0.0109, "P@1000": 0.00113}, "nfcorpus": {"P@1": 0.47988, "P@3": 0.38803, "P@5": 0.33994, "P@10": 0.26749, "P@100": 0.08514, "P@1000": 0.0213}, "arguana": {"P@1": 0.23542, "P@3": 0.15386, "P@5": 0.11679, "P@10": 0.0756, "P@100": 0.0097, "P@1000": 0.001}, "cqadupstack_webmasters": {"P@1": 0.27668, "P@3": 0.14822, "P@5": 0.10909, "P@10": 0.07016, "P@100": 0.01437, "P@1000": 0.0023}, "cqadupstack_android": {"P@1": 0.33476, "P@3": 0.17263, "P@5": 0.1279, "P@10": 0.08083, "P@100": 0.01303, "P@1000": 0.00176}, "cqadupstack_mathematica": {"P@1": 0.15796, "P@3": 0.10075, "P@5": 0.07786, "P@10": 0.0505, "P@100": 0.00917, "P@1000": 0.00133}, "cqadupstack_english": {"P@1": 0.37325, "P@3": 0.19427, "P@5": 0.13682, "P@10": 0.08363, "P@100": 0.01347, "P@1000": 0.00182}, "cqadupstack_gaming": {"P@1": 0.40313, "P@3": 0.20564, "P@5": 0.14408, "P@10": 0.08401, "P@100": 0.01147, "P@1000": 0.00132}, "scidocs": {"P@1": 0.208, "P@3": 0.165, "P@5": 0.1362, "P@10": 0.095, "P@100": 0.02015, "P@1000": 0.00323}, "cqadupstack_programmers": {"P@1": 0.2968, "P@3": 0.16096, "P@5": 0.11598, "P@10": 0.07043, "P@100": 0.01162, "P@1000": 0.00158}, "cqadupstack_physics": {"P@1": 0.31954, "P@3": 0.17036, "P@5": 0.12589, "P@10": 0.07719, "P@100": 0.01207, "P@1000": 0.0016}, "cqadupstack_gis": {"P@1": 0.25424, "P@3": 0.13371, "P@5": 0.09175, "P@10": 0.05469, "P@100": 0.00877, "P@1000": 0.00108}, "cqadupstack_unix": {"P@1": 0.27985, "P@3": 0.13993, "P@5": 0.0972, "P@10": 0.05802, "P@100": 0.0096, "P@1000": 0.0013}, "cqadupstack_stats": {"P@1": 0.23773, "P@3": 0.12423, "P@5": 0.08528, "P@10": 0.0523, "P@100": 0.00802, "P@1000": 0.00112}, "fiqa": {"P@1": 0.34105, "P@3": 0.21399, "P@5": 0.16296, "P@10": 0.09985, "P@100": 0.01694, "P@1000": 0.00229}, "cqadupstack_wordpress": {"P@1": 0.21627, "P@3": 0.11337, "P@5": 0.08392, "P@10": 0.05046, "P@100": 0.00793, "P@1000": 0.00111}, "cqadupstack_tex": {"P@1": 0.19718, "P@3": 0.11023, "P@5": 0.07915, "P@10": 0.04917, "P@100": 0.00837, "P@1000": 0.00127}, "quora": {"P@1": 0.6307, "P@3": 0.30727, "P@5": 0.20858, "P@10": 0.11841, "P@100": 0.01481, "P@1000": 0.00156}, "trec-covid": {"P@1": 0.92, "P@3": 0.91333, "P@5": 0.884, "P@10": 0.864, "P@100": 0.6364, "P@1000": 0.23266}, "webis-touche2020": {"P@1": 0.28571, "P@3": 0.27891, "P@5": 0.24898, "P@10": 0.21633, "P@100": 0.07327, "P@1000": 0.01496}}}}

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  }
+]

pytorch_model-00001-of-00003.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7aa0368f5635d9558fff1652236f8e16dccc2f65aaa7320d3decc7d559eda631
+size 9947280444

pytorch_model-00002-of-00003.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2350a568a6491d756f271357ba5147c86d4e0ebe5d83b97156218d9d5366c835
+size 9733438429

pytorch_model-00003-of-00003.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75f5c706e045b84e7b54c554eae2da0f02fea5870a19e943997cc567849a1e9d
+size 8592219901

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,372 @@

+{
+  "metadata": {
+    "total_size": 28272820224
+  },
+  "weight_map": {
+    "h.0.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.0.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.0.mlp.dense_4h_to_h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.0.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.0.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.0.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.0.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.0.self_attention.dense.bias": "pytorch_model-00001-of-00003.bin",
+    "h.0.self_attention.dense.weight": "pytorch_model-00001-of-00003.bin",
+    "h.0.self_attention.query_key_value.bias": "pytorch_model-00001-of-00003.bin",
+    "h.0.self_attention.query_key_value.weight": "pytorch_model-00001-of-00003.bin",
+    "h.1.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.1.mlp.dense_4h_to_h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.1.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.1.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.1.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.1.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.1.self_attention.dense.bias": "pytorch_model-00001-of-00003.bin",
+    "h.1.self_attention.dense.weight": "pytorch_model-00001-of-00003.bin",
+    "h.1.self_attention.query_key_value.bias": "pytorch_model-00001-of-00003.bin",
+    "h.1.self_attention.query_key_value.weight": "pytorch_model-00001-of-00003.bin",
+    "h.10.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.10.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.10.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.10.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.10.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.10.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.10.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.10.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.10.self_attention.dense.bias": "pytorch_model-00002-of-00003.bin",
+    "h.10.self_attention.dense.weight": "pytorch_model-00002-of-00003.bin",
+    "h.10.self_attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
+    "h.10.self_attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
+    "h.11.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.11.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.11.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.11.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.11.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.11.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.11.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.11.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.11.self_attention.dense.bias": "pytorch_model-00002-of-00003.bin",
+    "h.11.self_attention.dense.weight": "pytorch_model-00002-of-00003.bin",
+    "h.11.self_attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
+    "h.11.self_attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
+    "h.12.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.12.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.12.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.12.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.12.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.12.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.12.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.12.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.12.self_attention.dense.bias": "pytorch_model-00002-of-00003.bin",
+    "h.12.self_attention.dense.weight": "pytorch_model-00002-of-00003.bin",
+    "h.12.self_attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
+    "h.12.self_attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
+    "h.13.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.13.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.13.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.13.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.13.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.13.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.13.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.13.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.13.self_attention.dense.bias": "pytorch_model-00002-of-00003.bin",
+    "h.13.self_attention.dense.weight": "pytorch_model-00002-of-00003.bin",
+    "h.13.self_attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
+    "h.13.self_attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
+    "h.14.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.14.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.14.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.14.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.14.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.14.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.14.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.14.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.14.self_attention.dense.bias": "pytorch_model-00002-of-00003.bin",
+    "h.14.self_attention.dense.weight": "pytorch_model-00002-of-00003.bin",
+    "h.14.self_attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
+    "h.14.self_attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
+    "h.15.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.15.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.15.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.15.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.15.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.15.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.15.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.15.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.15.self_attention.dense.bias": "pytorch_model-00002-of-00003.bin",
+    "h.15.self_attention.dense.weight": "pytorch_model-00002-of-00003.bin",
+    "h.15.self_attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
+    "h.15.self_attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
+    "h.16.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.16.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.16.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.16.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.16.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.16.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.16.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.16.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.16.self_attention.dense.bias": "pytorch_model-00002-of-00003.bin",
+    "h.16.self_attention.dense.weight": "pytorch_model-00002-of-00003.bin",
+    "h.16.self_attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
+    "h.16.self_attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
+    "h.17.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.17.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.17.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.17.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.17.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.17.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.17.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.17.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.17.self_attention.dense.bias": "pytorch_model-00002-of-00003.bin",
+    "h.17.self_attention.dense.weight": "pytorch_model-00002-of-00003.bin",
+    "h.17.self_attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
+    "h.17.self_attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
+    "h.18.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.18.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.18.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.18.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.18.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.18.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.18.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.18.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.18.self_attention.dense.bias": "pytorch_model-00002-of-00003.bin",
+    "h.18.self_attention.dense.weight": "pytorch_model-00002-of-00003.bin",
+    "h.18.self_attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
+    "h.18.self_attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
+    "h.19.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.19.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.19.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.19.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.19.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.19.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.19.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.19.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.19.self_attention.dense.bias": "pytorch_model-00002-of-00003.bin",
+    "h.19.self_attention.dense.weight": "pytorch_model-00002-of-00003.bin",
+    "h.19.self_attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
+    "h.19.self_attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
+    "h.2.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.2.mlp.dense_4h_to_h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.2.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.2.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.2.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.2.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.2.self_attention.dense.bias": "pytorch_model-00001-of-00003.bin",
+    "h.2.self_attention.dense.weight": "pytorch_model-00001-of-00003.bin",
+    "h.2.self_attention.query_key_value.bias": "pytorch_model-00001-of-00003.bin",
+    "h.2.self_attention.query_key_value.weight": "pytorch_model-00001-of-00003.bin",
+    "h.20.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.20.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.20.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.20.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.20.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.20.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.20.post_attention_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.20.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.20.self_attention.dense.bias": "pytorch_model-00003-of-00003.bin",
+    "h.20.self_attention.dense.weight": "pytorch_model-00003-of-00003.bin",
+    "h.20.self_attention.query_key_value.bias": "pytorch_model-00003-of-00003.bin",
+    "h.20.self_attention.query_key_value.weight": "pytorch_model-00003-of-00003.bin",
+    "h.21.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.21.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.21.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.21.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.21.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.21.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.21.post_attention_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.21.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.21.self_attention.dense.bias": "pytorch_model-00003-of-00003.bin",
+    "h.21.self_attention.dense.weight": "pytorch_model-00003-of-00003.bin",
+    "h.21.self_attention.query_key_value.bias": "pytorch_model-00003-of-00003.bin",
+    "h.21.self_attention.query_key_value.weight": "pytorch_model-00003-of-00003.bin",
+    "h.22.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.22.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.22.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.22.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.22.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.22.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.22.post_attention_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.22.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.22.self_attention.dense.bias": "pytorch_model-00003-of-00003.bin",
+    "h.22.self_attention.dense.weight": "pytorch_model-00003-of-00003.bin",
+    "h.22.self_attention.query_key_value.bias": "pytorch_model-00003-of-00003.bin",
+    "h.22.self_attention.query_key_value.weight": "pytorch_model-00003-of-00003.bin",
+    "h.23.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.23.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.23.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.23.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.23.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.23.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.23.post_attention_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.23.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.23.self_attention.dense.bias": "pytorch_model-00003-of-00003.bin",
+    "h.23.self_attention.dense.weight": "pytorch_model-00003-of-00003.bin",
+    "h.23.self_attention.query_key_value.bias": "pytorch_model-00003-of-00003.bin",
+    "h.23.self_attention.query_key_value.weight": "pytorch_model-00003-of-00003.bin",
+    "h.24.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.24.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.24.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.24.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.24.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.24.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.24.post_attention_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.24.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.24.self_attention.dense.bias": "pytorch_model-00003-of-00003.bin",
+    "h.24.self_attention.dense.weight": "pytorch_model-00003-of-00003.bin",
+    "h.24.self_attention.query_key_value.bias": "pytorch_model-00003-of-00003.bin",
+    "h.24.self_attention.query_key_value.weight": "pytorch_model-00003-of-00003.bin",
+    "h.25.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.25.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.25.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.25.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.25.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.25.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.25.post_attention_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.25.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.25.self_attention.dense.bias": "pytorch_model-00003-of-00003.bin",
+    "h.25.self_attention.dense.weight": "pytorch_model-00003-of-00003.bin",
+    "h.25.self_attention.query_key_value.bias": "pytorch_model-00003-of-00003.bin",
+    "h.25.self_attention.query_key_value.weight": "pytorch_model-00003-of-00003.bin",
+    "h.26.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.26.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.26.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.26.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.26.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.26.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.26.post_attention_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.26.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.26.self_attention.dense.bias": "pytorch_model-00003-of-00003.bin",
+    "h.26.self_attention.dense.weight": "pytorch_model-00003-of-00003.bin",
+    "h.26.self_attention.query_key_value.bias": "pytorch_model-00003-of-00003.bin",
+    "h.26.self_attention.query_key_value.weight": "pytorch_model-00003-of-00003.bin",
+    "h.27.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.27.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.27.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.27.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.27.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.27.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.27.post_attention_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.27.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.27.self_attention.dense.bias": "pytorch_model-00003-of-00003.bin",
+    "h.27.self_attention.dense.weight": "pytorch_model-00003-of-00003.bin",
+    "h.27.self_attention.query_key_value.bias": "pytorch_model-00003-of-00003.bin",
+    "h.27.self_attention.query_key_value.weight": "pytorch_model-00003-of-00003.bin",
+    "h.28.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.28.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.28.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.28.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.28.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.28.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.28.post_attention_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.28.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.28.self_attention.dense.bias": "pytorch_model-00003-of-00003.bin",
+    "h.28.self_attention.dense.weight": "pytorch_model-00003-of-00003.bin",
+    "h.28.self_attention.query_key_value.bias": "pytorch_model-00003-of-00003.bin",
+    "h.28.self_attention.query_key_value.weight": "pytorch_model-00003-of-00003.bin",
+    "h.29.input_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.29.input_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.29.mlp.dense_4h_to_h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.29.mlp.dense_4h_to_h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.29.mlp.dense_h_to_4h.bias": "pytorch_model-00003-of-00003.bin",
+    "h.29.mlp.dense_h_to_4h.weight": "pytorch_model-00003-of-00003.bin",
+    "h.29.post_attention_layernorm.bias": "pytorch_model-00003-of-00003.bin",
+    "h.29.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin",
+    "h.29.self_attention.dense.bias": "pytorch_model-00003-of-00003.bin",
+    "h.29.self_attention.dense.weight": "pytorch_model-00003-of-00003.bin",
+    "h.29.self_attention.query_key_value.bias": "pytorch_model-00003-of-00003.bin",
+    "h.29.self_attention.query_key_value.weight": "pytorch_model-00003-of-00003.bin",
+    "h.3.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.3.mlp.dense_4h_to_h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.3.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.3.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.3.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.3.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.3.self_attention.dense.bias": "pytorch_model-00001-of-00003.bin",
+    "h.3.self_attention.dense.weight": "pytorch_model-00001-of-00003.bin",
+    "h.3.self_attention.query_key_value.bias": "pytorch_model-00001-of-00003.bin",
+    "h.3.self_attention.query_key_value.weight": "pytorch_model-00001-of-00003.bin",
+    "h.4.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.4.mlp.dense_4h_to_h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.4.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.4.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.4.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.4.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.4.self_attention.dense.bias": "pytorch_model-00001-of-00003.bin",
+    "h.4.self_attention.dense.weight": "pytorch_model-00001-of-00003.bin",
+    "h.4.self_attention.query_key_value.bias": "pytorch_model-00001-of-00003.bin",
+    "h.4.self_attention.query_key_value.weight": "pytorch_model-00001-of-00003.bin",
+    "h.5.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.5.mlp.dense_4h_to_h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.5.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.5.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.5.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.5.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.5.self_attention.dense.bias": "pytorch_model-00001-of-00003.bin",
+    "h.5.self_attention.dense.weight": "pytorch_model-00001-of-00003.bin",
+    "h.5.self_attention.query_key_value.bias": "pytorch_model-00001-of-00003.bin",
+    "h.5.self_attention.query_key_value.weight": "pytorch_model-00001-of-00003.bin",
+    "h.6.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.6.mlp.dense_4h_to_h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.6.mlp.dense_4h_to_h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.6.mlp.dense_h_to_4h.bias": "pytorch_model-00001-of-00003.bin",
+    "h.6.mlp.dense_h_to_4h.weight": "pytorch_model-00001-of-00003.bin",
+    "h.6.post_attention_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.6.self_attention.dense.bias": "pytorch_model-00001-of-00003.bin",
+    "h.6.self_attention.dense.weight": "pytorch_model-00001-of-00003.bin",
+    "h.6.self_attention.query_key_value.bias": "pytorch_model-00001-of-00003.bin",
+    "h.6.self_attention.query_key_value.weight": "pytorch_model-00001-of-00003.bin",
+    "h.7.input_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "h.7.input_layernorm.weight": "pytorch_model-00001-of-00003.bin",
+    "h.7.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.7.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.7.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.7.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.7.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.7.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.7.self_attention.dense.bias": "pytorch_model-00002-of-00003.bin",
+    "h.7.self_attention.dense.weight": "pytorch_model-00002-of-00003.bin",
+    "h.7.self_attention.query_key_value.bias": "pytorch_model-00001-of-00003.bin",
+    "h.7.self_attention.query_key_value.weight": "pytorch_model-00001-of-00003.bin",
+    "h.8.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.8.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.8.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.8.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.8.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.8.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.8.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.8.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.8.self_attention.dense.bias": "pytorch_model-00002-of-00003.bin",
+    "h.8.self_attention.dense.weight": "pytorch_model-00002-of-00003.bin",
+    "h.8.self_attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
+    "h.8.self_attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
+    "h.9.input_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.9.input_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.9.mlp.dense_4h_to_h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.9.mlp.dense_4h_to_h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.9.mlp.dense_h_to_4h.bias": "pytorch_model-00002-of-00003.bin",
+    "h.9.mlp.dense_h_to_4h.weight": "pytorch_model-00002-of-00003.bin",
+    "h.9.post_attention_layernorm.bias": "pytorch_model-00002-of-00003.bin",
+    "h.9.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin",
+    "h.9.self_attention.dense.bias": "pytorch_model-00002-of-00003.bin",
+    "h.9.self_attention.dense.weight": "pytorch_model-00002-of-00003.bin",
+    "h.9.self_attention.query_key_value.bias": "pytorch_model-00002-of-00003.bin",
+    "h.9.self_attention.query_key_value.weight": "pytorch_model-00002-of-00003.bin",
+    "ln_f.bias": "pytorch_model-00003-of-00003.bin",
+    "ln_f.weight": "pytorch_model-00003-of-00003.bin",
+    "word_embeddings.weight": "pytorch_model-00001-of-00003.bin",
+    "word_embeddings_layernorm.bias": "pytorch_model-00001-of-00003.bin",
+    "word_embeddings_layernorm.weight": "pytorch_model-00001-of-00003.bin"
+  }
+}

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 300,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53b546fa3c6b32936b8e411c6b0c98c66fa272b0e69d1bb025b0026b1908e930
+size 14500905

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "name_or_path": "/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/6b3/bloom-7b1",
+  "pad_token": "<pad>",
+  "padding_side": "left",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "BloomTokenizer",
+  "unk_token": "<unk>"
+}