Muennighoff commited on
Commit
043ecd9
1 Parent(s): a70a11c

Add sgpt-nli-bloom-1b3

Browse files
.gitattributes CHANGED
@@ -25,3 +25,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
29
+
1_Pooling/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 2048,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": true,
8
+ "pooling_mode_lasttoken": false
9
+ }
README.md ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: sentence-similarity
3
+ tags:
4
+ - sentence-transformers
5
+ - feature-extraction
6
+ - sentence-similarity
7
+ ---
8
+
9
+ # {MODEL_NAME}
10
+
11
+ This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 2048 dimensional dense vector space and can be used for tasks like clustering or semantic search.
12
+
13
+ <!--- Describe your model here -->
14
+
15
+ ## Usage (Sentence-Transformers)
16
+
17
+ Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
18
+
19
+ ```
20
+ pip install -U sentence-transformers
21
+ ```
22
+
23
+ Then you can use the model like this:
24
+
25
+ ```python
26
+ from sentence_transformers import SentenceTransformer
27
+ sentences = ["This is an example sentence", "Each sentence is converted"]
28
+
29
+ model = SentenceTransformer('{MODEL_NAME}')
30
+ embeddings = model.encode(sentences)
31
+ print(embeddings)
32
+ ```
33
+
34
+
35
+
36
+ ## Evaluation Results
37
+
38
+ <!--- Describe how your model was evaluated -->
39
+
40
+ For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name={MODEL_NAME})
41
+
42
+
43
+ ## Training
44
+ The model was trained with the parameters:
45
+
46
+ **DataLoader**:
47
+
48
+ `sentence_transformers.datasets.NoDuplicatesDataLoader.NoDuplicatesDataLoader` of length 4403 with parameters:
49
+ ```
50
+ {'batch_size': 128}
51
+ ```
52
+
53
+ **Loss**:
54
+
55
+ `sentence_transformers.losses.MultipleNegativesRankingLoss.MNRLGradCache`
56
+
57
+ Parameters of the fit()-Method:
58
+ ```
59
+ {
60
+ "epochs": 1,
61
+ "evaluation_steps": 440,
62
+ "evaluator": "sentence_transformers.evaluation.EmbeddingSimilarityEvaluator.EmbeddingSimilarityEvaluator",
63
+ "max_grad_norm": 1,
64
+ "optimizer_class": "<class 'transformers.optimization.AdamW'>",
65
+ "optimizer_params": {
66
+ "lr": 0.00032
67
+ },
68
+ "scheduler": "WarmupLinear",
69
+ "steps_per_epoch": null,
70
+ "warmup_steps": 441,
71
+ "weight_decay": 0.01
72
+ }
73
+ ```
74
+
75
+
76
+ ## Full Model Architecture
77
+ ```
78
+ SentenceTransformer(
79
+ (0): Transformer({'max_seq_length': 75, 'do_lower_case': False}) with Transformer model: BloomModel
80
+ (1): Pooling({'word_embedding_dimension': 2048, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': True, 'pooling_mode_lasttoken': False})
81
+ )
82
+ ```
83
+
84
+ ## Citing & Authors
85
+
86
+ <!--- Describe where people can find more information -->
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/opc/sgpt/biencoder/nli_msmarco/sentence-transformers/bloom-1b3",
3
+ "apply_residual_connection_post_layernorm": false,
4
+ "architectures": [
5
+ "BloomModel"
6
+ ],
7
+ "attention_dropout": 0.0,
8
+ "attention_softmax_in_fp32": true,
9
+ "bias_dropout_fusion": true,
10
+ "bos_token_id": 50256,
11
+ "eos_token_id": 50256,
12
+ "hidden_dropout": 0.0,
13
+ "initializer_range": 0.02,
14
+ "layer_norm_epsilon": 1e-05,
15
+ "masked_softmax_fusion": true,
16
+ "model_type": "bloom",
17
+ "n_embed": 2048,
18
+ "n_inner": null,
19
+ "n_layer": 24,
20
+ "num_attention_heads": 16,
21
+ "offset_alibi": 100,
22
+ "pretraining_tp": 2,
23
+ "seq_length": 4096,
24
+ "skip_bias_add": true,
25
+ "skip_bias_add_qkv": false,
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.20.0.dev0",
28
+ "use_cache": false,
29
+ "vocab_size": 250880
30
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.1.0",
4
+ "transformers": "4.20.0.dev0",
5
+ "pytorch": "1.10.2"
6
+ }
7
+ }
eval/similarity_evaluation_sts-dev_results.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
2
+ 0,440,0.8242313306787753,0.8226813115133954,0.8276069619650719,0.8268759567569194,0.8334862792361277,0.8320821542999498,0.7353693665298873,0.7348598843588485
3
+ 0,880,0.8459843086553267,0.8503815475776895,0.842878294673236,0.8451332415984594,0.8480524942302925,0.8501835422367507,0.7474383889678284,0.7496324023372983
4
+ 0,1320,0.8552344036003329,0.8603697378784526,0.8360007236193618,0.8411717619962727,0.8416522715227013,0.8464700628057116,0.7299126130844387,0.739072574098811
5
+ 0,1760,0.8580401062147619,0.8640839343918373,0.8352734951935745,0.841666317497713,0.8404680775310315,0.8465381160962572,0.7257277248318785,0.7377711461544183
6
+ 0,2200,0.8608343119089805,0.866170556828279,0.8348571349832736,0.841525970425769,0.839211238760418,0.8458434307006681,0.727537285544208,0.7416178694496526
7
+ 0,2640,0.8609322291807961,0.8669794933295952,0.834098925095663,0.8412185342705436,0.8378885926527198,0.8450370545608037,0.7246544989448657,0.7391210367657075
8
+ 0,3080,0.8612335308151773,0.8668760555357917,0.8326572542978878,0.8395897555664348,0.8363905686559461,0.8433466318245757,0.7195257907085693,0.737346286430613
9
+ 0,3520,0.8626062277494557,0.8681258749036436,0.8346575503908169,0.841873317640566,0.8380398063817516,0.8453286972493597,0.7197310734973534,0.7381475503287697
10
+ 0,3960,0.8624141332696987,0.8678907964495984,0.8341648718112045,0.8419911326684907,0.8374000202256104,0.8451632700889332,0.7212668518554104,0.7392373446751543
11
+ 0,4400,0.8618680667686823,0.867442027456624,0.8333086384128641,0.8410478063947154,0.8365619082265696,0.8441108013910831,0.7185411402492852,0.737630633601933
12
+ 0,-1,0.8618705483964846,0.867452539919395,0.8333168833290586,0.8410583081813221,0.8365715498213021,0.8441200754188356,0.7185606597944942,0.73761900725939
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e880c3f13ae9a2912d4077266ac000c9611eebcc1123814d4af63a02a7edb72
3
+ size 6889731343
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
1
+ {
2
+ "max_seq_length": 75,
3
+ "do_lower_case": false
4
+ }
sgpt-nli-bloom-1b3_weightedmean_layer-1_results_average_precision.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
1
+ {
2
+ "askubuntu": 57.44,
3
+ "cqadupstack": 14.18,
4
+ "twitterpara": 73.99,
5
+ "scidocs": 74.74,
6
+ "avg": 55.087500000000006
7
+ }
sgpt-nli-bloom-1b3_weightedmean_layer-1_results_detailed.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "askubuntu": {
3
+ "map_askubuntu_title": 57.44,
4
+ "p@1_askubuntu_title": 56.45,
5
+ "p@5_askubuntu_title": 42.8,
6
+ "mrr_askubuntu_title": 70.07
7
+ },
8
+ "cqadupstack": {
9
+ "map@100_cqadupstack_unix": 12.58,
10
+ "ndcg@10_cqadupstack_unix": 14.17,
11
+ "map@100_cqadupstack_gaming": 27.68,
12
+ "ndcg@10_cqadupstack_gaming": 30.06,
13
+ "map@100_cqadupstack_wordpress": 7.22,
14
+ "ndcg@10_cqadupstack_wordpress": 8.13,
15
+ "map@100_cqadupstack_stats": 14.86,
16
+ "ndcg@10_cqadupstack_stats": 15.8,
17
+ "map@100_cqadupstack_tex": 8.28,
18
+ "ndcg@10_cqadupstack_tex": 9.28,
19
+ "map@100_cqadupstack_english": 16.19,
20
+ "ndcg@10_cqadupstack_english": 17.94,
21
+ "map@100_cqadupstack_programmers": 12.47,
22
+ "ndcg@10_cqadupstack_programmers": 13.4,
23
+ "map@100_cqadupstack_mathematica": 12.05,
24
+ "ndcg@10_cqadupstack_mathematica": 13.43,
25
+ "map@100_cqadupstack_physics": 16.5,
26
+ "ndcg@10_cqadupstack_physics": 18.28,
27
+ "map@100_cqadupstack_gis": 18.1,
28
+ "ndcg@10_cqadupstack_gis": 19.14,
29
+ "map@100_cqadupstack_webmasters": 9.1,
30
+ "ndcg@10_cqadupstack_webmasters": 10.34,
31
+ "map@100_cqadupstack_android": 15.16,
32
+ "ndcg@10_cqadupstack_android": 17.27,
33
+ "map@100_cqadupstack_avg": 14.18,
34
+ "ndcg@10_cqadupstack_avg": 15.6
35
+ },
36
+ "twitterpara": {
37
+ "ap_twitter_twitterurl": 77.05,
38
+ "spearman_twitter_twitterurl": 71.33,
39
+ "ap_twitter_pit": 70.92,
40
+ "spearman_twitter_pit": 53.43,
41
+ "ap_twitter_avg": 73.99,
42
+ "spearman_twitter_avg": 62.38
43
+ },
44
+ "scidocs": {
45
+ "map_scidocs_cite_euclidean": 72.36,
46
+ "ndcg_scidocs_cite_euclidean": 86.55,
47
+ "map_scidocs_cite_cosine": 72.36,
48
+ "ndcg_scidocs_cite_cosine": 86.55,
49
+ "map_scidocs_cocite_euclidean": 75.37,
50
+ "ndcg_scidocs_cocite_euclidean": 88.13,
51
+ "map_scidocs_cocite_cosine": 75.37,
52
+ "ndcg_scidocs_cocite_cosine": 88.13,
53
+ "map_scidocs_coview_euclidean": 76.56,
54
+ "ndcg_scidocs_coview_euclidean": 87.9,
55
+ "map_scidocs_coview_cosine": 76.56,
56
+ "ndcg_scidocs_coview_cosine": 87.9,
57
+ "map_scidocs_coread_euclidean": 74.67,
58
+ "ndcg_scidocs_coread_euclidean": 87.14,
59
+ "map_scidocs_coread_cosine": 74.67,
60
+ "ndcg_scidocs_coread_cosine": 87.14,
61
+ "map_scidocs_euclidean_avg": 74.74,
62
+ "ndcg_scidocs_euclidean_avg": 87.43,
63
+ "map_scidocs_cosine_avg": 74.74,
64
+ "ndcg_scidocs_cosine_avg": 87.43
65
+ }
66
+ }
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "</s>"}
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ab8d6865d6043895799adce910945c14ad5f54737d618dbefbe85e84d1a3436
3
+ size 14500694
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "<unk>", "eos_token": "</s>", "bos_token": "<s>", "pad_token": "<pad>", "name_or_path": "/home/opc/sgpt/biencoder/nli_msmarco/sentence-transformers/bloom-1b3", "special_tokens_map_file": null, "tokenizer_class": "PreTrainedTokenizerFast"}