Muennighoff commited on
Commit
74d3f8d
1 Parent(s): ed6e7ab

Better model with bs=1024

Browse files
README.md CHANGED
@@ -14,7 +14,7 @@ For usage instructions, refer to our codebase: https://github.com/Muennighoff/sg
14
 
15
  ## Evaluation Results
16
 
17
- For eval results, refer to our paper: https://arxiv.org/abs/2202.08904
18
 
19
  ## Training
20
  The model was trained with the parameters:
14
 
15
  ## Evaluation Results
16
 
17
+ For eval results, refer to the eval folder or our paper: https://arxiv.org/abs/2202.08904
18
 
19
  ## Training
20
  The model was trained with the parameters:
config.json CHANGED
@@ -75,7 +75,7 @@
75
  },
76
  "tokenizer_class": "GPT2Tokenizer",
77
  "torch_dtype": "float32",
78
- "transformers_version": "4.11.3",
79
  "use_cache": true,
80
  "vocab_size": 50257,
81
  "window_size": 256
75
  },
76
  "tokenizer_class": "GPT2Tokenizer",
77
  "torch_dtype": "float32",
78
+ "transformers_version": "4.20.0.dev0",
79
  "use_cache": true,
80
  "vocab_size": 50257,
81
  "window_size": 256
config_sentence_transformers.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.1.0",
4
- "transformers": "4.11.3",
5
- "pytorch": "1.10.1"
6
  }
7
  }
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.1.0",
4
+ "transformers": "4.20.0.dev0",
5
+ "pytorch": "1.10.2"
6
  }
7
  }
eval/SGPT-2.7B-weightedmean-nli-bitfit_weighted_mean_layer-1_results_average_precision.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
1
+ {
2
+ "askubuntu": 57.48,
3
+ "cqadupstack": 14.04,
4
+ "twitterpara": 73.4,
5
+ "scidocs": 74.69,
6
+ "avg": 54.9025
7
+ }
eval/SGPT-2.7B-weightedmean-nli-bitfit_weighted_mean_layer-1_results_detailed.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "askubuntu": {
3
+ "map_askubuntu_title": 57.48,
4
+ "p@1_askubuntu_title": 56.99,
5
+ "p@5_askubuntu_title": 43.23,
6
+ "mrr_askubuntu_title": 70.74
7
+ },
8
+ "cqadupstack": {
9
+ "map@100_cqadupstack_unix": 14.82,
10
+ "ndcg@10_cqadupstack_unix": 16.39,
11
+ "map@100_cqadupstack_gaming": 26.14,
12
+ "ndcg@10_cqadupstack_gaming": 28.7,
13
+ "map@100_cqadupstack_wordpress": 4.64,
14
+ "ndcg@10_cqadupstack_wordpress": 5.88,
15
+ "map@100_cqadupstack_stats": 15.42,
16
+ "ndcg@10_cqadupstack_stats": 16.15,
17
+ "map@100_cqadupstack_tex": 8.28,
18
+ "ndcg@10_cqadupstack_tex": 8.96,
19
+ "map@100_cqadupstack_english": 15.02,
20
+ "ndcg@10_cqadupstack_english": 16.54,
21
+ "map@100_cqadupstack_programmers": 13.27,
22
+ "ndcg@10_cqadupstack_programmers": 14.41,
23
+ "map@100_cqadupstack_mathematica": 11.74,
24
+ "ndcg@10_cqadupstack_mathematica": 13.47,
25
+ "map@100_cqadupstack_physics": 16.81,
26
+ "ndcg@10_cqadupstack_physics": 18.61,
27
+ "map@100_cqadupstack_gis": 15.47,
28
+ "ndcg@10_cqadupstack_gis": 16.67,
29
+ "map@100_cqadupstack_webmasters": 9.72,
30
+ "ndcg@10_cqadupstack_webmasters": 10.48,
31
+ "map@100_cqadupstack_android": 17.12,
32
+ "ndcg@10_cqadupstack_android": 19.1,
33
+ "map@100_cqadupstack_avg": 14.04,
34
+ "ndcg@10_cqadupstack_avg": 15.45
35
+ },
36
+ "twitterpara": {
37
+ "ap_twitter_twitterurl": 75.84,
38
+ "spearman_twitter_twitterurl": 70.81,
39
+ "ap_twitter_pit": 70.96,
40
+ "spearman_twitter_pit": 56.64,
41
+ "ap_twitter_avg": 73.4,
42
+ "spearman_twitter_avg": 63.73
43
+ },
44
+ "scidocs": {
45
+ "map_scidocs_cite_euclidean": 72.29,
46
+ "ndcg_scidocs_cite_euclidean": 86.43,
47
+ "map_scidocs_cite_cosine": 72.29,
48
+ "ndcg_scidocs_cite_cosine": 86.43,
49
+ "map_scidocs_cocite_euclidean": 75.36,
50
+ "ndcg_scidocs_cocite_euclidean": 88.17,
51
+ "map_scidocs_cocite_cosine": 75.36,
52
+ "ndcg_scidocs_cocite_cosine": 88.17,
53
+ "map_scidocs_coview_euclidean": 76.46,
54
+ "ndcg_scidocs_coview_euclidean": 87.8,
55
+ "map_scidocs_coview_cosine": 76.46,
56
+ "ndcg_scidocs_coview_cosine": 87.8,
57
+ "map_scidocs_coread_euclidean": 74.65,
58
+ "ndcg_scidocs_coread_euclidean": 87.0,
59
+ "map_scidocs_coread_cosine": 74.65,
60
+ "ndcg_scidocs_coread_cosine": 87.0,
61
+ "map_scidocs_euclidean_avg": 74.69,
62
+ "ndcg_scidocs_euclidean_avg": 87.35,
63
+ "map_scidocs_cosine_avg": 74.69,
64
+ "ndcg_scidocs_cosine_avg": 87.35
65
+ }
66
+ }
eval/quora.json ADDED
@@ -0,0 +1 @@
 
1
+ {"SGPT-2.7B-weightedmean-nli-bitfit": {"quora": {"NDCG@1": 0.7461, "NDCG@3": 0.79099, "NDCG@5": 0.80989, "NDCG@10": 0.82645, "NDCG@100": 0.84542, "NDCG@1000": 0.8478}}}
eval/similarity_evaluation_sts-dev_results.csv CHANGED
@@ -1,12 +1,12 @@
1
  epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
2
- 0,7045,0.854762523109346,0.8625722411777832,0.8559952110984044,0.8599071798553728,0.8562325179051519,0.8607011270217957,0.7604255385773144,0.7606474726533847
3
- 0,14090,0.862071545726209,0.8686263495658821,0.8587162825998957,0.8617412291621442,0.8592342866823668,0.8627125269846087,0.782391853949178,0.7829935432149258
4
- 0,21135,0.861147560133928,0.8673147899430655,0.8565176946586014,0.859934895176966,0.8562610749976107,0.8599692261614817,0.767074455302903,0.7695279716565803
5
- 0,28180,0.8633257964426606,0.8706412335168616,0.8552663992944554,0.8599920464619075,0.8540030273419198,0.8592715940080606,0.7791548165962676,0.7857437212669921
6
- 0,35225,0.8571401068053901,0.8634311296708672,0.8509601042154958,0.8549359573084144,0.8507577553572114,0.8553929614415839,0.7642286538174881,0.770790070391215
7
- 0,42270,0.8598605654357643,0.8650105143335562,0.8511585774583015,0.8554589632664334,0.8505706583139645,0.8553814756685433,0.7617624805084298,0.7702625967180601
8
- 0,49315,0.8573338952704139,0.8609427481847676,0.842483574189439,0.8467522455218017,0.8416057486283591,0.8466172561049204,0.7626709741843098,0.772898390349653
9
- 0,56360,0.8621834500026332,0.8670699694896324,0.8505378356088411,0.8545156885949555,0.8496468004499398,0.854239264916894,0.7677057311830797,0.7762473321595961
10
- 0,63405,0.8591097675730657,0.8630995653851116,0.8446734640853417,0.8496054770256773,0.8437993624528112,0.8492440088050155,0.7627356339474485,0.7727980219530748
11
- 0,70450,0.8598307992062442,0.864221237988858,0.84629527410727,0.8513689337571044,0.845389605407747,0.8508766807487939,0.7613761802820678,0.7708455277248111
12
- 0,-1,0.8598065664641443,0.8642014942030082,0.8462916558850146,0.8513545689785207,0.8453849685801835,0.8508523604473343,0.7613656467412337,0.7707829013471281
1
  epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
2
+ 0,440,0.8547642811744708,0.8604042254467369,0.8564294919751527,0.8580900991676403,0.8592109600985026,0.8606819249266364,0.7727095196030622,0.7702266970220912
3
+ 0,880,0.8605577862452674,0.8664787776815549,0.8594251818914206,0.8613580463896765,0.8625107129203857,0.8646695024116025,0.7777588243182069,0.7747923460803676
4
+ 0,1320,0.8641797068951016,0.8701385809189678,0.8600269510402324,0.8623718727121046,0.8630083477192438,0.8655171844912587,0.7819169473167653,0.7801439440796124
5
+ 0,1760,0.863861676991937,0.8699711768888497,0.8610538565702486,0.8639273844617363,0.8638166020097834,0.8668464202840234,0.7762041862089968,0.7743392283299438
6
+ 0,2200,0.8667562252351253,0.8722428457163393,0.8609922973987619,0.8640152137038429,0.8637144738269167,0.866880911411029,0.7797478785593531,0.7772643228752733
7
+ 0,2640,0.8658548494423817,0.8702345916613825,0.8583041242377912,0.8613687812725296,0.8606700693927242,0.8638960499205391,0.7781537908033099,0.7761412829543439
8
+ 0,3080,0.8643810926871549,0.8692556763950754,0.857059374227981,0.8600999939200575,0.8594169577592663,0.8625821753483399,0.7756975962910497,0.7737551514144106
9
+ 0,3520,0.8661238263202532,0.8701186430665476,0.8576723473616406,0.8614191549727733,0.8600335298933423,0.8639555339473548,0.777608544440925,0.7758958610767906
10
+ 0,3960,0.8659908588458113,0.8699200451809654,0.8570869831042444,0.860628569017929,0.8594235992474281,0.8631523750300969,0.7746604455122261,0.7731850887434243
11
+ 0,4400,0.8662536580670237,0.8702266564863804,0.8571446774934243,0.8608170966035958,0.8594735292127258,0.8633222114462352,0.7763396949906898,0.7751198872316742
12
+ 0,-1,0.8662583413659,0.8702579991508459,0.8571471752177104,0.8608379873119169,0.8594701305965878,0.8633306420570356,0.7763449731290442,0.7751015402439239
pytorch_model.bin → pytorch_model-00001-of-00002.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97d3fa68460291ddf0b431b9cf0beb67ea22dac2a356a732447e7f74388ce61b
3
- size 10739623849
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3328d67e21cfc12696ed04be3b111723b5d0e712ecc53de8cacf360745de42c5
3
+ size 9996965077
pytorch_model-00002-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:020dafb349b69af0f5e674afb6950199a2b705093f1a618500e9a55809240b52
3
+ size 742637183
pytorch_model.bin.index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1faacf954a349388fab25a3b20e1f0a1e09d87d1eb840569b3af5e4333b3785a
3
+ size 32846
similarity_evaluation_sts-test_results.csv DELETED
@@ -1,2 +0,0 @@
1
- epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
2
- -1,-1,0.8421195813886659,0.8544705934133663,0.8331975612681435,0.8365254010816424,0.8308796371031103,0.8350842363910842,0.7416086337173262,0.730738158975577
 
 
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-2.7B", "errors": "replace", "tokenizer_class": "GPT2Tokenizer"}
1
+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-2.7B", "errors": "replace", "pad_token": null, "add_bos_token": false, "tokenizer_class": "GPT2Tokenizer"}