Better model with bs=1024

Browse files

Files changed (13) hide show

README.md +1 -1
config.json +1 -1
config_sentence_transformers.json +2 -2
eval/SGPT-2.7B-weightedmean-nli-bitfit_weighted_mean_layer-1_results_average_precision.json +7 -0
eval/SGPT-2.7B-weightedmean-nli-bitfit_weighted_mean_layer-1_results_detailed.json +66 -0
eval/quora.json +1 -0
eval/similarity_evaluation_sts-dev_results.csv +11 -11
pytorch_model.bin → pytorch_model-00001-of-00002.bin +2 -2
pytorch_model-00002-of-00002.bin +3 -0
pytorch_model.bin.index.json +3 -0
similarity_evaluation_sts-test_results.csv +0 -2
tokenizer.json +0 -0
tokenizer_config.json +1 -1

README.md CHANGED Viewed

@@ -14,7 +14,7 @@ For usage instructions, refer to our codebase: https://github.com/Muennighoff/sg
 ## Evaluation Results
-For eval results, refer to our paper: https://arxiv.org/abs/2202.08904
 ## Training
 The model was trained with the parameters:

 ## Evaluation Results
+For eval results, refer to the eval folder or our paper: https://arxiv.org/abs/2202.08904
 ## Training
 The model was trained with the parameters:

config.json CHANGED Viewed

@@ -75,7 +75,7 @@
   },
   "tokenizer_class": "GPT2Tokenizer",
   "torch_dtype": "float32",
-  "transformers_version": "4.11.3",
   "use_cache": true,
   "vocab_size": 50257,
   "window_size": 256

   },
   "tokenizer_class": "GPT2Tokenizer",
   "torch_dtype": "float32",
+  "transformers_version": "4.20.0.dev0",
   "use_cache": true,
   "vocab_size": 50257,
   "window_size": 256

config_sentence_transformers.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "__version__": {
     "sentence_transformers": "2.1.0",
-    "transformers": "4.11.3",
-    "pytorch": "1.10.1"
   }
 }

 {
   "__version__": {
     "sentence_transformers": "2.1.0",
+    "transformers": "4.20.0.dev0",
+    "pytorch": "1.10.2"
   }
 }

eval/SGPT-2.7B-weightedmean-nli-bitfit_weighted_mean_layer-1_results_average_precision.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "askubuntu": 57.48,
+    "cqadupstack": 14.04,
+    "twitterpara": 73.4,
+    "scidocs": 74.69,
+    "avg": 54.9025
+}

eval/SGPT-2.7B-weightedmean-nli-bitfit_weighted_mean_layer-1_results_detailed.json ADDED Viewed

	@@ -0,0 +1,66 @@

+{
+    "askubuntu": {
+        "map_askubuntu_title": 57.48,
+        "p@1_askubuntu_title": 56.99,
+        "p@5_askubuntu_title": 43.23,
+        "mrr_askubuntu_title": 70.74
+    },
+    "cqadupstack": {
+        "map@100_cqadupstack_unix": 14.82,
+        "ndcg@10_cqadupstack_unix": 16.39,
+        "map@100_cqadupstack_gaming": 26.14,
+        "ndcg@10_cqadupstack_gaming": 28.7,
+        "map@100_cqadupstack_wordpress": 4.64,
+        "ndcg@10_cqadupstack_wordpress": 5.88,
+        "map@100_cqadupstack_stats": 15.42,
+        "ndcg@10_cqadupstack_stats": 16.15,
+        "map@100_cqadupstack_tex": 8.28,
+        "ndcg@10_cqadupstack_tex": 8.96,
+        "map@100_cqadupstack_english": 15.02,
+        "ndcg@10_cqadupstack_english": 16.54,
+        "map@100_cqadupstack_programmers": 13.27,
+        "ndcg@10_cqadupstack_programmers": 14.41,
+        "map@100_cqadupstack_mathematica": 11.74,
+        "ndcg@10_cqadupstack_mathematica": 13.47,
+        "map@100_cqadupstack_physics": 16.81,
+        "ndcg@10_cqadupstack_physics": 18.61,
+        "map@100_cqadupstack_gis": 15.47,
+        "ndcg@10_cqadupstack_gis": 16.67,
+        "map@100_cqadupstack_webmasters": 9.72,
+        "ndcg@10_cqadupstack_webmasters": 10.48,
+        "map@100_cqadupstack_android": 17.12,
+        "ndcg@10_cqadupstack_android": 19.1,
+        "map@100_cqadupstack_avg": 14.04,
+        "ndcg@10_cqadupstack_avg": 15.45
+    },
+    "twitterpara": {
+        "ap_twitter_twitterurl": 75.84,
+        "spearman_twitter_twitterurl": 70.81,
+        "ap_twitter_pit": 70.96,
+        "spearman_twitter_pit": 56.64,
+        "ap_twitter_avg": 73.4,
+        "spearman_twitter_avg": 63.73
+    },
+    "scidocs": {
+        "map_scidocs_cite_euclidean": 72.29,
+        "ndcg_scidocs_cite_euclidean": 86.43,
+        "map_scidocs_cite_cosine": 72.29,
+        "ndcg_scidocs_cite_cosine": 86.43,
+        "map_scidocs_cocite_euclidean": 75.36,
+        "ndcg_scidocs_cocite_euclidean": 88.17,
+        "map_scidocs_cocite_cosine": 75.36,
+        "ndcg_scidocs_cocite_cosine": 88.17,
+        "map_scidocs_coview_euclidean": 76.46,
+        "ndcg_scidocs_coview_euclidean": 87.8,
+        "map_scidocs_coview_cosine": 76.46,
+        "ndcg_scidocs_coview_cosine": 87.8,
+        "map_scidocs_coread_euclidean": 74.65,
+        "ndcg_scidocs_coread_euclidean": 87.0,
+        "map_scidocs_coread_cosine": 74.65,
+        "ndcg_scidocs_coread_cosine": 87.0,
+        "map_scidocs_euclidean_avg": 74.69,
+        "ndcg_scidocs_euclidean_avg": 87.35,
+        "map_scidocs_cosine_avg": 74.69,
+        "ndcg_scidocs_cosine_avg": 87.35
+    }
+}

eval/quora.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"SGPT-2.7B-weightedmean-nli-bitfit": {"quora": {"NDCG@1": 0.7461, "NDCG@3": 0.79099, "NDCG@5": 0.80989, "NDCG@10": 0.82645, "NDCG@100": 0.84542, "NDCG@1000": 0.8478}}}

eval/similarity_evaluation_sts-dev_results.csv CHANGED Viewed

@@ -1,12 +1,12 @@
 epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
-0,7045,0.854762523109346,0.8625722411777832,0.8559952110984044,0.8599071798553728,0.8562325179051519,0.8607011270217957,0.7604255385773144,0.7606474726533847
-0,14090,0.862071545726209,0.8686263495658821,0.8587162825998957,0.8617412291621442,0.8592342866823668,0.8627125269846087,0.782391853949178,0.7829935432149258
-0,21135,0.861147560133928,0.8673147899430655,0.8565176946586014,0.859934895176966,0.8562610749976107,0.8599692261614817,0.767074455302903,0.7695279716565803
-0,28180,0.8633257964426606,0.8706412335168616,0.8552663992944554,0.8599920464619075,0.8540030273419198,0.8592715940080606,0.7791548165962676,0.7857437212669921
-0,35225,0.8571401068053901,0.8634311296708672,0.8509601042154958,0.8549359573084144,0.8507577553572114,0.8553929614415839,0.7642286538174881,0.770790070391215
-0,42270,0.8598605654357643,0.8650105143335562,0.8511585774583015,0.8554589632664334,0.8505706583139645,0.8553814756685433,0.7617624805084298,0.7702625967180601
-0,49315,0.8573338952704139,0.8609427481847676,0.842483574189439,0.8467522455218017,0.8416057486283591,0.8466172561049204,0.7626709741843098,0.772898390349653
-0,56360,0.8621834500026332,0.8670699694896324,0.8505378356088411,0.8545156885949555,0.8496468004499398,0.854239264916894,0.7677057311830797,0.7762473321595961
-0,63405,0.8591097675730657,0.8630995653851116,0.8446734640853417,0.8496054770256773,0.8437993624528112,0.8492440088050155,0.7627356339474485,0.7727980219530748
-0,70450,0.8598307992062442,0.864221237988858,0.84629527410727,0.8513689337571044,0.845389605407747,0.8508766807487939,0.7613761802820678,0.7708455277248111
-0,-1,0.8598065664641443,0.8642014942030082,0.8462916558850146,0.8513545689785207,0.8453849685801835,0.8508523604473343,0.7613656467412337,0.7707829013471281

 epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
+0,440,0.8547642811744708,0.8604042254467369,0.8564294919751527,0.8580900991676403,0.8592109600985026,0.8606819249266364,0.7727095196030622,0.7702266970220912
+0,880,0.8605577862452674,0.8664787776815549,0.8594251818914206,0.8613580463896765,0.8625107129203857,0.8646695024116025,0.7777588243182069,0.7747923460803676
+0,1320,0.8641797068951016,0.8701385809189678,0.8600269510402324,0.8623718727121046,0.8630083477192438,0.8655171844912587,0.7819169473167653,0.7801439440796124
+0,1760,0.863861676991937,0.8699711768888497,0.8610538565702486,0.8639273844617363,0.8638166020097834,0.8668464202840234,0.7762041862089968,0.7743392283299438
+0,2200,0.8667562252351253,0.8722428457163393,0.8609922973987619,0.8640152137038429,0.8637144738269167,0.866880911411029,0.7797478785593531,0.7772643228752733
+0,2640,0.8658548494423817,0.8702345916613825,0.8583041242377912,0.8613687812725296,0.8606700693927242,0.8638960499205391,0.7781537908033099,0.7761412829543439
+0,3080,0.8643810926871549,0.8692556763950754,0.857059374227981,0.8600999939200575,0.8594169577592663,0.8625821753483399,0.7756975962910497,0.7737551514144106
+0,3520,0.8661238263202532,0.8701186430665476,0.8576723473616406,0.8614191549727733,0.8600335298933423,0.8639555339473548,0.777608544440925,0.7758958610767906
+0,3960,0.8659908588458113,0.8699200451809654,0.8570869831042444,0.860628569017929,0.8594235992474281,0.8631523750300969,0.7746604455122261,0.7731850887434243
+0,4400,0.8662536580670237,0.8702266564863804,0.8571446774934243,0.8608170966035958,0.8594735292127258,0.8633222114462352,0.7763396949906898,0.7751198872316742
+0,-1,0.8662583413659,0.8702579991508459,0.8571471752177104,0.8608379873119169,0.8594701305965878,0.8633306420570356,0.7763449731290442,0.7751015402439239

pytorch_model.bin → pytorch_model-00001-of-00002.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97d3fa68460291ddf0b431b9cf0beb67ea22dac2a356a732447e7f74388ce61b
-size 10739623849

 version https://git-lfs.github.com/spec/v1
+oid sha256:3328d67e21cfc12696ed04be3b111723b5d0e712ecc53de8cacf360745de42c5
+size 9996965077

pytorch_model-00002-of-00002.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:020dafb349b69af0f5e674afb6950199a2b705093f1a618500e9a55809240b52
+size 742637183

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1faacf954a349388fab25a3b20e1f0a1e09d87d1eb840569b3af5e4333b3785a
+size 32846

similarity_evaluation_sts-test_results.csv DELETED Viewed

	@@ -1,2 +0,0 @@
1	- epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
2	- -1,-1,0.8421195813886659,0.8544705934133663,0.8331975612681435,0.8365254010816424,0.8308796371031103,0.8350842363910842,0.7416086337173262,0.730738158975577

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1 +1 @@

- {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-2.7B", "errors": "replace", "tokenizer_class": "GPT2Tokenizer"}

+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-2.7B", "errors": "replace", "pad_token": null, "add_bos_token": false, "tokenizer_class": "GPT2Tokenizer"}