Better model with bs=256

Browse files

Files changed (9) hide show

README.md +1 -1
config.json +1 -1
config_sentence_transformers.json +2 -2
eval/beir.json +276 -0
pytorch_model.bin → pytorch_model-00001-of-00002.bin +2 -2
pytorch_model-00002-of-00002.bin +3 -0
pytorch_model.bin.index.json +3 -0
tokenizer.json +0 -0
tokenizer_config.json +1 -1

README.md CHANGED Viewed

@@ -14,7 +14,7 @@ For usage instructions, refer to our codebase: https://github.com/Muennighoff/sg
 ## Evaluation Results
-For eval results, refer to our paper: https://arxiv.org/abs/2202.08904
 ## Training
 The model was trained with the parameters:

 ## Evaluation Results
+For eval results, refer to the eval folder or our paper: https://arxiv.org/abs/2202.08904
 ## Training
 The model was trained with the parameters:

config.json CHANGED Viewed

@@ -75,7 +75,7 @@
   },
   "tokenizer_class": "GPT2Tokenizer",
   "torch_dtype": "float32",
-  "transformers_version": "4.11.3",
   "use_cache": true,
   "vocab_size": 50259,
   "window_size": 256

   },
   "tokenizer_class": "GPT2Tokenizer",
   "torch_dtype": "float32",
+  "transformers_version": "4.20.0.dev0",
   "use_cache": true,
   "vocab_size": 50259,
   "window_size": 256

config_sentence_transformers.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "__version__": {
     "sentence_transformers": "2.1.0",
-    "transformers": "4.11.3",
-    "pytorch": "1.10.1"
   }
 }

 {
   "__version__": {
     "sentence_transformers": "2.1.0",
+    "transformers": "4.20.0.dev0",
+    "pytorch": "1.10.2"
   }
 }

eval/beir.json ADDED Viewed

	@@ -0,0 +1,276 @@

+{
+    "SGPT-2.7B-weightedmean-msmarco-specb-bitfit": {
+        "cqadupstack_android": {
+            "NDCG@1": 0.34049,
+            "NDCG@3": 0.37556,
+            "NDCG@5": 0.39905,
+            "NDCG@10": 0.42857,
+            "NDCG@100": 0.48229,
+            "NDCG@1000": 0.50638
+        },
+        "cqadupstack_english": {
+            "NDCG@1": 0.32548,
+            "NDCG@3": 0.37526,
+            "NDCG@5": 0.39115,
+            "NDCG@10": 0.41469,
+            "NDCG@100": 0.45826,
+            "NDCG@1000": 0.47974
+        },
+        "cqadupstack_gaming": {
+            "NDCG@1": 0.3906,
+            "NDCG@3": 0.45969,
+            "NDCG@5": 0.4835,
+            "NDCG@10": 0.5126,
+            "NDCG@100": 0.55479,
+            "NDCG@1000": 0.56948
+        },
+        "cqadupstack_gis": {
+            "NDCG@1": 0.23616,
+            "NDCG@3": 0.28729,
+            "NDCG@5": 0.30564,
+            "NDCG@10": 0.33284,
+            "NDCG@100": 0.38248,
+            "NDCG@1000": 0.40625
+        },
+        "cqadupstack_mathematica": {
+            "NDCG@1": 0.14428,
+            "NDCG@3": 0.19214,
+            "NDCG@5": 0.21163,
+            "NDCG@10": 0.24038,
+            "NDCG@100": 0.29953,
+            "NDCG@1000": 0.33115
+        },
+        "cqadupstack_physics": {
+            "NDCG@1": 0.30029,
+            "NDCG@3": 0.35534,
+            "NDCG@5": 0.37864,
+            "NDCG@10": 0.40593,
+            "NDCG@100": 0.46298,
+            "NDCG@1000": 0.48534
+        },
+        "cqadupstack_programmers": {
+            "NDCG@1": 0.28425,
+            "NDCG@3": 0.32215,
+            "NDCG@5": 0.34139,
+            "NDCG@10": 0.37059,
+            "NDCG@100": 0.42629,
+            "NDCG@1000": 0.45306
+        },
+        "cqadupstack_stats": {
+            "NDCG@1": 0.20552,
+            "NDCG@3": 0.2467,
+            "NDCG@5": 0.2676,
+            "NDCG@10": 0.28543,
+            "NDCG@100": 0.33504,
+            "NDCG@1000": 0.36273
+        },
+        "cqadupstack_wordpress": {
+            "NDCG@1": 0.20148,
+            "NDCG@3": 0.24132,
+            "NDCG@5": 0.2599,
+            "NDCG@10": 0.28437,
+            "NDCG@100": 0.33323,
+            "NDCG@1000": 0.36257
+        },
+        "cqadupstack_webmasters": {
+            "NDCG@1": 0.25296,
+            "NDCG@3": 0.31291,
+            "NDCG@5": 0.32524,
+            "NDCG@10": 0.35099,
+            "NDCG@100": 0.40592,
+            "NDCG@1000": 0.43605
+        },
+        "cqadupstack_unix": {
+            "NDCG@1": 0.24627,
+            "NDCG@3": 0.28856,
+            "NDCG@5": 0.30818,
+            "NDCG@10": 0.33186,
+            "NDCG@100": 0.38704,
+            "NDCG@1000": 0.41468
+        },
+        "cqadupstack_tex": {
+            "NDCG@1": 0.16999,
+            "NDCG@3": 0.19658,
+            "NDCG@5": 0.21547,
+            "NDCG@10": 0.23556,
+            "NDCG@100": 0.28445,
+            "NDCG@1000": 0.31631
+        },
+        "cqadupstack": {
+            "NDCG@1": 0.2581475,
+            "NDCG@3": 0.30445833333333333,
+            "NDCG@5": 0.3239491666666666,
+            "NDCG@10": 0.34948416666666665,
+            "NDCG@100": 0.4010250000000001,
+            "NDCG@1000": 0.42697833333333335
+        },
+        "trec-covid": {
+            "NDCG@1": 0.92,
+            "NDCG@3": 0.87246,
+            "NDCG@5": 0.83239,
+            "NDCG@10": 0.80666,
+            "NDCG@100": 0.57691,
+            "NDCG@1000": 0.48148
+        },
+        "trec-news": {
+            "NDCG@1": 0.50439,
+            "NDCG@3": 0.47263,
+            "NDCG@5": 0.47021,
+            "NDCG@10": 0.4379,
+            "NDCG@100": 0.43861,
+            "NDCG@1000": 0.54047
+        },
+        "signal1m": {
+            "NDCG@1": 0.36598,
+            "NDCG@3": 0.325,
+            "NDCG@5": 0.28387,
+            "NDCG@10": 0.24938,
+            "NDCG@100": 0.26649,
+            "NDCG@1000": 0.32811
+        },
+        "nfcorpus": {
+            "NDCG@1": 0.42879,
+            "NDCG@3": 0.3913,
+            "NDCG@5": 0.3704,
+            "NDCG@10": 0.33859,
+            "NDCG@100": 0.3138,
+            "NDCG@1000": 0.40355
+        },
+        "robust04": {
+            "NDCG@1": 0.59036,
+            "NDCG@3": 0.53574,
+            "NDCG@5": 0.50433,
+            "NDCG@10": 0.44895,
+            "NDCG@100": 0.36797,
+            "NDCG@1000": 0.45528
+        },
+        "average": {
+            "NDCG@1": 0.4584631944444445,
+            "NDCG@3": 0.44786324074074074,
+            "NDCG@5": 0.44920439814814817,
+            "NDCG@10": 0.45288189814814817,
+            "NDCG@100": 0.47065527777777777,
+            "NDCG@1000": 0.5063276851851853
+        },
+        "subaverage": {
+            "NDCG@1": 0.5116781818181819,
+            "NDCG@3": 0.49828636363636364,
+            "NDCG@5": 0.50451,
+            "NDCG@10": 0.5136945454545455,
+            "NDCG@100": 0.5280254545454546,
+            "NDCG@1000": 0.5527736363636363
+        },
+        "subsubaverage": {
+            "NDCG@1": 0.35645,
+            "NDCG@3": 0.377964,
+            "NDCG@5": 0.387838,
+            "NDCG@10": 0.40856200000000004,
+            "NDCG@100": 0.444236,
+            "NDCG@1000": 0.480742
+        },
+        "hotpotqa": {
+            "NDCG@1": 0.64389,
+            "NDCG@3": 0.48987,
+            "NDCG@5": 0.51016,
+            "NDCG@10": 0.52835,
+            "NDCG@100": 0.5585,
+            "NDCG@1000": 0.57493
+        },
+        "fiqa": {
+            "NDCG@1": 0.31019,
+            "NDCG@3": 0.29326,
+            "NDCG@5": 0.30571,
+            "NDCG@10": 0.33282,
+            "NDCG@100": 0.39516,
+            "NDCG@1000": 0.42946
+        },
+        "arguana": {
+            "NDCG@1": 0.2596,
+            "NDCG@3": 0.40018,
+            "NDCG@5": 0.45133,
+            "NDCG@10": 0.50512,
+            "NDCG@100": 0.54867,
+            "NDCG@1000": 0.55109
+        },
+        "climate-fever": {
+            "NDCG@1": 0.23322,
+            "NDCG@3": 0.21506,
+            "NDCG@5": 0.23853,
+            "NDCG@10": 0.27171,
+            "NDCG@100": 0.34051,
+            "NDCG@1000": 0.37522
+        },
+        "scifact": {
+            "NDCG@1": 0.58667,
+            "NDCG@3": 0.6479,
+            "NDCG@5": 0.6728,
+            "NDCG@10": 0.70165,
+            "NDCG@100": 0.7294,
+            "NDCG@1000": 0.73457
+        },
+        "msmarco": {
+            "NDCG@1": 0.21203,
+            "NDCG@3": 0.31221,
+            "NDCG@5": 0.3507,
+            "NDCG@10": 0.38832,
+            "NDCG@100": 0.44741,
+            "NDCG@1000": 0.46137
+        },
+        "webis-touche2020": {
+            "NDCG@1": 0.35714,
+            "NDCG@3": 0.26146,
+            "NDCG@5": 0.24908,
+            "NDCG@10": 0.23519,
+            "NDCG@100": 0.35356,
+            "NDCG@1000": 0.46504
+        },
+        "quora": {
+            "NDCG@1": 0.777,
+            "NDCG@3": 0.82276,
+            "NDCG@5": 0.84034,
+            "NDCG@10": 0.85592,
+            "NDCG@100": 0.87054,
+            "NDCG@1000": 0.87235
+        },
+        "scidocs": {
+            "NDCG@1": 0.197,
+            "NDCG@3": 0.15718,
+            "NDCG@5": 0.13895,
+            "NDCG@10": 0.16463,
+            "NDCG@100": 0.23415,
+            "NDCG@1000": 0.28504
+        },
+        "fever": {
+            "NDCG@1": 0.61446,
+            "NDCG@3": 0.69113,
+            "NDCG@5": 0.71169,
+            "NDCG@10": 0.72753,
+            "NDCG@100": 0.74513,
+            "NDCG@1000": 0.74954
+        },
+        "dbpedia-entity": {
+            "NDCG@1": 0.4975,
+            "NDCG@3": 0.39577,
+            "NDCG@5": 0.36718,
+            "NDCG@10": 0.3471,
+            "NDCG@100": 0.3761,
+            "NDCG@1000": 0.44328
+        },
+        "bioasq": {
+            "NDCG@1": 0.416,
+            "NDCG@3": 0.39386,
+            "NDCG@5": 0.38583,
+            "NDCG@10": 0.38394,
+            "NDCG@100": 0.4388,
+            "NDCG@1000": 0.46983
+        },
+        "nq": {
+            "NDCG@1": 0.292,
+            "NDCG@3": 0.39152,
+            "NDCG@5": 0.42893,
+            "NDCG@10": 0.46695,
+            "NDCG@100": 0.51647,
+            "NDCG@1000": 0.52768
+        }
+    }
+}

pytorch_model.bin → pytorch_model-00001-of-00002.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3438cf9b193cdbfaa90926b46d3b892d5eca30a7c8841769e0001375cd5f8bfa
-size 10739644329

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e3447afc0e22ec32f948f4003fb987b04377216f0ce903359f065002e10bc24
+size 9996985557

pytorch_model-00002-of-00002.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:199884372221b5bfdb95e93aca93755269fbbe9a34f50a5c2ae2579a4f00238f
+size 742637183

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d41a5c9da0d7efc22fb98f1475b73e1b1100eeebe7bfa53d097369c00ea82e2
+size 32846

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1 +1 @@

- {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-2.7B", "errors": "replace", "tokenizer_class": "GPT2Tokenizer"}

+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-2.7B", "errors": "replace", "pad_token": null, "add_bos_token": false, "tokenizer_class": "GPT2Tokenizer"}