# # Pyserini: Reproducible IR research with sparse and dense representations # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # QUERY_INFO = { "tct_colbert-msmarco-passage-dev-subset": { "description": "MS MARCO passage dev set queries encoded by TCT-ColBERT", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-msmarco-passage-dev-subset-20210419-9323ec.tar.gz", ], "md5": "b2fe6494241639153f26cc61acf3b39d", "size (bytes)": 20078757, "total_queries": 6980, "downloaded": False }, "tct_colbert-v2-msmarco-passage-dev-subset": { "description": "MS MARCO passage dev set queries encoded by TCT-ColBERT V2", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-msmarco-passage-dev-subset-20210608-5f341b.tar.gz", ], "md5": "ee8d76e596aef02c5027a2ffd0ff66f8", "size (bytes)": 20072992, "total_queries": 6980, "downloaded": False }, "tct_colbert-v2-hn-msmarco-passage-dev-subset": { "description": "MS MARCO passage dev set queries encoded by TCT-ColBERT V2 HN", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hn-msmarco-passage-dev-subset-20210608-5f341b.tar.gz", ], "md5": "f7e39cf2cd3ee53f7f8f2e0a1821431c", "size (bytes)": 20074411, "total_queries": 6980, "downloaded": False }, "tct_colbert-v2-hnp-msmarco-passage-dev-subset": { "description": "MS MARCO passage dev set queries encoded by TCT-ColBERT V2 HN+", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hnp-msmarco-passage-dev-subset-20210608-5f341b.tar.gz", ], "md5": "bed8036475774d12915c8af2a44612f4", "size (bytes)": 20078958, "total_queries": 6980, "downloaded": False }, "tct_colbert-v2-hnp-dl19-passage": { "description": "TREC DL19-passage queries encoded by TCT-ColBERT V2 HN+", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hnp-dl19-passage-20230124-99b795.tar.gz", ], "md5": "ee945fb0a5b17cba4e2e5d51318fbe05", "size (bytes)": 125193, "total_queries": 43, "downloaded": False }, "tct_colbert-v2-hnp-dl20": { "description": "TREC DL20 queries encoded by TCT-ColBERT V2 HN+", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hnp-dl20-passage-20230124-99b795.tar.gz", ], "md5": "b940d3d38cf5a50a9467a4aa7a59d226", "size (bytes)": 577645, "total_queries": 200, "downloaded": False }, "ance-msmarco-passage-dev-subset": { "description": "MS MARCO passage dev set queries encoded by ANCE", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance-msmarco-passage-dev-subset-20210419-9323ec.tar.gz", ], "md5": "adad81bb1495eff2f0463e809ecc01b8", "size (bytes)": 19965095, "total_queries": 6980, "downloaded": False }, "ance-dl19-passage": { "description": "TREC DL19 passage queries encoded by ANCE", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance-dl19-passage-20230124-99b79.tar.gz", ], "md5": "828714ef5481dc49686e14b61881ba06", "size (bytes)": 124468, "total_queries": 43, "downloaded": False }, "ance-dl20": { "description": "TREC DL20 queries encoded by ANCE", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance-dl20-passage-20230124-99b79.tar.gz", ], "md5": "79acea9812a5c20d0d0817b07b348d15", "size (bytes)": 574183, "total_queries": 200, "downloaded": False }, "tct_colbert-msmarco-doc-dev": { "description": "MS MARCO Document dev set queries encoded by TCT-ColBERT zero-shot", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-msmarco-doc-dev-20210419-9323ec.tar.gz", ], "md5": "565fe57f92b229643b68fa3263f089a9", "size (bytes)": 14940124, "total_queries": 6980, "downloaded": False }, "ance_maxp-msmarco-doc-dev": { "description": "MS MARCO Document dev set queries encoded by ANCE maxp", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_maxp-msmarco-doc-dev-20210419-9323ec.tar.gz", ], "md5": "3d41ae797cb97e42649c4f4fa7b97d56", "size (bytes)": 14854155, "total_queries": 6980, "downloaded": False }, "sbert-msmarco-passage-dev-subset": { "description": "MS MARCO passage dev set queries encoded by SBERT", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-sbert-msmarco-passage-dev-subset-20210419-9323ec.tar.gz", ], "md5": "dc0d09a0f5803824c1ad46a39417aa1e", "size (bytes)": 20058701, "total_queries": 6980, "downloaded": False }, "distilbert_kd-msmarco-passage-dev-subset": { "description": "MS MARCO passage dev set queries encoded by SBERT", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_kd-msmarco-passage-dev-subset-20210419-9323ec.tar.gz", ], "md5": "4706ec91183eefa9771e9311fe4799e0", "size (bytes)": 20013009, "total_queries": 6980, "downloaded": False }, "distilbert_kd-dl19-passage": { "description": "TREC DL19 passage queries encoded by SBERT", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_kd-dl19-passage-20230124-99b79.tar.gz", ], "md5": "c9fe8c8112a7d4fcda1aa606af77e66a", "size (bytes)": 124760, "total_queries": 43, "downloaded": False }, "distilbert_kd-dl20": { "description": "TREC DL20 queries encoded by SBERT", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_kd-dl20-passage-20230124-99b79.tar.gz", ], "md5": "09fe19984515145a78183a98e44bd699", "size (bytes)": 575682, "total_queries": 200, "downloaded": False }, "distilbert_tas_b-msmarco-passage-dev-subset": { "description": "MS MARCO passage dev set queries encoded by TAS-B", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_dot_tas_b_b256-msmarco-passage-dev-subset-20210527-63276f.tar.gz", ], "md5": "17a3f81de7ba497728050b83733b1c46", "size (bytes)": 20016799, "total_queries": 6980, "downloaded": False }, "distilbert_tas_b-dl19-passage": { "description": "TREC DL19 passage queries encoded by TAS-B", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_dot_tas_b_b256-dl19-passage-20230124-99b795.tar.gz", ], "md5": "a0a23a1be77e6e9e5dfacf32dfcd5e9b", "size (bytes)": 124809, "total_queries": 43, "downloaded": False }, "distilbert_tas_b-dl20": { "description": "TREC DL20 queries encoded by TAS-B", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_dot_tas_b_b256-dl20-passage-20230124-99b795.tar.gz", ], "md5": "8ffb4d5a17a2c028fb5065ef8a394ab3", "size (bytes)": 575875, "total_queries": 200, "downloaded": False }, "dpr_multi-nq-dev": { "description": "Natural Question dev set questions encoded by DPR question encoder trained on multiset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-nq-dev-20210419-9323ec.tar.gz", ], "md5": "c2fd32438129e4994ce2ce71e08de875", "size (bytes)": 25129398, "total_queries": 8757, "downloaded": False }, "dpr_multi-nq-test": { "description": "Natural Question test set questions encoded by DPR question encoder trained on multiset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-nq-test-20210419-9323ec.tar.gz", ], "md5": "1791f1ed078beb3a00847f75023eb020", "size (bytes)": 10365005, "total_queries": 3610, "downloaded": False }, "ance_multi-nq-dev": { "description": "Natural Question dev set questions encoded by ANCE question encoder trained on multiset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-nq-dev-20210419-9323ec.tar.gz", ], "md5": "a3ed32ec8d5a474f61e3c3a9968b26fd", "size (bytes)": 25163934, "total_queries": 8757, "downloaded": False }, "ance_multi-nq-test": { "description": "Natural Question test set questions encoded by ANCE question encoder trained on multiset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-nq-test-20210419-9323ec.tar.gz", ], "md5": "a356202b7c8f73758732c893a76a8005", "size (bytes)": 10379384, "total_queries": 3610, "downloaded": False }, "dpr_multi-trivia-dev": { "description": "TriviaQA dev set questions encoded by DPR question encoder trained on multiset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-trivia-dev-20210419-9323ec.tar.gz", ], "md5": "efac7b71ef52ca073331e896089456a4", "size (bytes)": 25517034, "total_queries": 8837, "downloaded": False }, "dpr_multi-trivia-test": { "description": "TriviaQA test set questions encoded by DPR question encoder trained on multiset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-trivia-test-20210419-9323ec.tar.gz", ], "md5": "01e95455d55d0495d806549f04a02c24", "size (bytes)": 32664437, "total_queries": 11313, "downloaded": False }, "ance_multi-trivia-dev": { "description": "TriviaQA dev set questions encoded by ANCE question encoder trained on multiset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-trivia-dev-20210419-9323ec.tar.gz", ], "md5": "bd88499a5785b15ba702173cc0e91417", "size (bytes)": 25559775, "total_queries": 8837, "downloaded": False }, "ance_multi-trivia-test": { "description": "TriviaQA test set questions encoded by ANCE question encoder trained on multiset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-trivia-test-20210419-9323ec.tar.gz", ], "md5": "3844dfb7f8feb6b064fa48775a35c6ee", "size (bytes)": 32717910, "total_queries": 11313, "downloaded": False }, "dpr_multi-wq-test": { "description": "Web Questions test set questions encoded by DPR question encoder trained on multiset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-wq-test-20210419-9323ec.tar.gz", ], "md5": "19aa721632d05afe031cc2da83a9a5a5", "size (bytes)": 5826854, "total_queries": 2032, "downloaded": False }, "dpr_multi-squad-test": { "description": "SQUAD dev set questions encoded by DPR question encoder trained on multiset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-squad-test-20210419-9323ec.tar.gz", ], "md5": "d11e0f801a488d51ad2a63b0748f4ae0", "size (bytes)": 30328268, "total_queries": 10570, "downloaded": False }, "dpr_multi-curated-test": { "description": "CuratedTREC test set questions encoded by DPR question encoder trained on multiset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-curated-test-20210419-9323ec.tar.gz", ], "md5": "d1737d3ec5a080d93350ae76b02c7fd1", "size (bytes)": 1995280, "total_queries": 694, "downloaded": False }, "dpr_single_nq-nq-dev": { "description": "NQ dev set questions encoded by DPR question encoder trained on NQ dataset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_single_nq-nq-dev-20210419-9323ec.tar.gz", ], "md5": "1a992f8d5336dc8654bba5ab7e375ebe", "size (bytes)": 25123288, "total_queries": 8757, "downloaded": False }, "dpr_single_nq-nq-test": { "description": "NQ test set questions encoded by DPR question encoder trained on NQ dataset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_single_nq-nq-test-20210419-9323ec.tar.gz", ], "md5": "e64bb009b6ba8bfe40d4b9967fd69240", "size (bytes)": 10362252, "total_queries": 3610, "downloaded": False }, "bpr_single_nq-nq-test": { "description": "NQ test set questions encoded by BPR question encoder trained on NQ dataset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-bpr_single_nq-nq-test-20210827-8a8f75.tar.gz", ], "md5": "b139d5a096ad52d2abc66fb54ec66158", "size (bytes)": 11094680, "total_queries": 3610, "downloaded": False }, "dkrr-dpr-nq-retriever-dpr-nq-dev": { "description": "DPR-NQ dev set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-dpr-nq-dev-20220304-7ffa54.tar.gz", ], "md5": "fe1276ae841bd5be6f3e0daac144273a", "size (bytes)": 25146740, "total_queries": 8757, "downloaded": False }, "dkrr-dpr-nq-retriever-dpr-nq-test": { "description": "DPR-NQ test set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-dpr-nq-test-20220304-7ffa54.tar.gz", ], "md5": "6c7793a0a89e7d10309a6973c52de326", "size (bytes)": 10370414, "total_queries": 3610, "downloaded": False }, "dkrr-dpr-nq-retriever-nq-dev": { "description": "NQ dev set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-nq-dev-20220304-7ffa54.tar.gz", ], "md5": "3c84c7fb6569d7690d5c38be61d3a5a4", "size (bytes)": 25146526, "total_queries": 8757, "downloaded": False }, "dkrr-dpr-nq-retriever-nq-test": { "description": "NQ test set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-nq-test-20220304-7ffa54.tar.gz", ], "md5": "cd3c30fc6dfde160983167b59acb17a3", "size (bytes)": 10370264, "total_queries": 3610, "downloaded": False }, "dkrr-dpr-tqa-retriever-dpr-tqa-dev": { "description": "TriviaQA dev set questions encoded by castorini/dkrr-dpr-tqa-retriever trained on TriviaQA dataset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-tqa-retriever-tqa-dev-20220304-7ffa54.tar.gz", ], "md5": "f9ca5060cf7794b681cd4fe3d3708c4d", "size (bytes)": 25540932, "total_queries": 8837, "downloaded": False }, "dkrr-dpr-tqa-retriever-dpr-tqa-test": { "description": "TriviaQA test set questions encoded by castorini/dkrr-dpr-tqa-retriever trained on TriviaQA dataset", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-tqa-retriever-tqa-test-20220304-7ffa54.tar.gz", ], "md5": "9cbd030c3a4478b7eb8356844bacc45b", "size (bytes)": 32688909, "total_queries": 11313, "downloaded": False }, "wiki-6-3-all-dpr2-multi-nq-test": { "description": "NQ test set questions encoded by castorini/wiki-all-6-3-multi-dpr2-query-encoder.", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-wiki-all-6-3-dpr2-multi-retriever-nq-test-20230103-186fa7.tar.gz", ], "md5": "2632ca1392a33e975d505acd5090250a", "size (bytes)": 10354577, "total_queries": 3610, "downloaded": False }, "wiki-6-3-all-dpr2-multi-dpr-trivia-test": { "description": "TriviaQA test set questions encoded by castorini/wiki-all-6-3-multi-dpr2-query-encoder.", "urls": [ "https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-wiki-all-6-3-dpr2-multi-retriever-dpr-trivia-test-20230103-186fa7.tar.gz", ], "md5": "d0abf8ff598daaec35acd972a465b0e2", "size (bytes)": 32620950, "total_queries": 11313, "downloaded": False } }