Spaces:
Runtime error
Runtime error
# | |
# Pyserini: Reproducible IR research with sparse and dense representations | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
QUERY_INFO = { | |
"tct_colbert-msmarco-passage-dev-subset": { | |
"description": "MS MARCO passage dev set queries encoded by TCT-ColBERT", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-msmarco-passage-dev-subset-20210419-9323ec.tar.gz", | |
], | |
"md5": "b2fe6494241639153f26cc61acf3b39d", | |
"size (bytes)": 20078757, | |
"total_queries": 6980, | |
"downloaded": False | |
}, | |
"tct_colbert-v2-msmarco-passage-dev-subset": { | |
"description": "MS MARCO passage dev set queries encoded by TCT-ColBERT V2", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-msmarco-passage-dev-subset-20210608-5f341b.tar.gz", | |
], | |
"md5": "ee8d76e596aef02c5027a2ffd0ff66f8", | |
"size (bytes)": 20072992, | |
"total_queries": 6980, | |
"downloaded": False | |
}, | |
"tct_colbert-v2-hn-msmarco-passage-dev-subset": { | |
"description": "MS MARCO passage dev set queries encoded by TCT-ColBERT V2 HN", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hn-msmarco-passage-dev-subset-20210608-5f341b.tar.gz", | |
], | |
"md5": "f7e39cf2cd3ee53f7f8f2e0a1821431c", | |
"size (bytes)": 20074411, | |
"total_queries": 6980, | |
"downloaded": False | |
}, | |
"tct_colbert-v2-hnp-msmarco-passage-dev-subset": { | |
"description": "MS MARCO passage dev set queries encoded by TCT-ColBERT V2 HN+", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hnp-msmarco-passage-dev-subset-20210608-5f341b.tar.gz", | |
], | |
"md5": "bed8036475774d12915c8af2a44612f4", | |
"size (bytes)": 20078958, | |
"total_queries": 6980, | |
"downloaded": False | |
}, | |
"tct_colbert-v2-hnp-dl19-passage": { | |
"description": "TREC DL19-passage queries encoded by TCT-ColBERT V2 HN+", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hnp-dl19-passage-20230124-99b795.tar.gz", | |
], | |
"md5": "ee945fb0a5b17cba4e2e5d51318fbe05", | |
"size (bytes)": 125193, | |
"total_queries": 43, | |
"downloaded": False | |
}, | |
"tct_colbert-v2-hnp-dl20": { | |
"description": "TREC DL20 queries encoded by TCT-ColBERT V2 HN+", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-v2-hnp-dl20-passage-20230124-99b795.tar.gz", | |
], | |
"md5": "b940d3d38cf5a50a9467a4aa7a59d226", | |
"size (bytes)": 577645, | |
"total_queries": 200, | |
"downloaded": False | |
}, | |
"ance-msmarco-passage-dev-subset": { | |
"description": "MS MARCO passage dev set queries encoded by ANCE", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance-msmarco-passage-dev-subset-20210419-9323ec.tar.gz", | |
], | |
"md5": "adad81bb1495eff2f0463e809ecc01b8", | |
"size (bytes)": 19965095, | |
"total_queries": 6980, | |
"downloaded": False | |
}, | |
"ance-dl19-passage": { | |
"description": "TREC DL19 passage queries encoded by ANCE", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance-dl19-passage-20230124-99b79.tar.gz", | |
], | |
"md5": "828714ef5481dc49686e14b61881ba06", | |
"size (bytes)": 124468, | |
"total_queries": 43, | |
"downloaded": False | |
}, | |
"ance-dl20": { | |
"description": "TREC DL20 queries encoded by ANCE", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance-dl20-passage-20230124-99b79.tar.gz", | |
], | |
"md5": "79acea9812a5c20d0d0817b07b348d15", | |
"size (bytes)": 574183, | |
"total_queries": 200, | |
"downloaded": False | |
}, | |
"tct_colbert-msmarco-doc-dev": { | |
"description": "MS MARCO Document dev set queries encoded by TCT-ColBERT zero-shot", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-tct_colbert-msmarco-doc-dev-20210419-9323ec.tar.gz", | |
], | |
"md5": "565fe57f92b229643b68fa3263f089a9", | |
"size (bytes)": 14940124, | |
"total_queries": 6980, | |
"downloaded": False | |
}, | |
"ance_maxp-msmarco-doc-dev": { | |
"description": "MS MARCO Document dev set queries encoded by ANCE maxp", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_maxp-msmarco-doc-dev-20210419-9323ec.tar.gz", | |
], | |
"md5": "3d41ae797cb97e42649c4f4fa7b97d56", | |
"size (bytes)": 14854155, | |
"total_queries": 6980, | |
"downloaded": False | |
}, | |
"sbert-msmarco-passage-dev-subset": { | |
"description": "MS MARCO passage dev set queries encoded by SBERT", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-sbert-msmarco-passage-dev-subset-20210419-9323ec.tar.gz", | |
], | |
"md5": "dc0d09a0f5803824c1ad46a39417aa1e", | |
"size (bytes)": 20058701, | |
"total_queries": 6980, | |
"downloaded": False | |
}, | |
"distilbert_kd-msmarco-passage-dev-subset": { | |
"description": "MS MARCO passage dev set queries encoded by SBERT", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_kd-msmarco-passage-dev-subset-20210419-9323ec.tar.gz", | |
], | |
"md5": "4706ec91183eefa9771e9311fe4799e0", | |
"size (bytes)": 20013009, | |
"total_queries": 6980, | |
"downloaded": False | |
}, | |
"distilbert_kd-dl19-passage": { | |
"description": "TREC DL19 passage queries encoded by SBERT", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_kd-dl19-passage-20230124-99b79.tar.gz", | |
], | |
"md5": "c9fe8c8112a7d4fcda1aa606af77e66a", | |
"size (bytes)": 124760, | |
"total_queries": 43, | |
"downloaded": False | |
}, | |
"distilbert_kd-dl20": { | |
"description": "TREC DL20 queries encoded by SBERT", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_kd-dl20-passage-20230124-99b79.tar.gz", | |
], | |
"md5": "09fe19984515145a78183a98e44bd699", | |
"size (bytes)": 575682, | |
"total_queries": 200, | |
"downloaded": False | |
}, | |
"distilbert_tas_b-msmarco-passage-dev-subset": { | |
"description": "MS MARCO passage dev set queries encoded by TAS-B", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_dot_tas_b_b256-msmarco-passage-dev-subset-20210527-63276f.tar.gz", | |
], | |
"md5": "17a3f81de7ba497728050b83733b1c46", | |
"size (bytes)": 20016799, | |
"total_queries": 6980, | |
"downloaded": False | |
}, | |
"distilbert_tas_b-dl19-passage": { | |
"description": "TREC DL19 passage queries encoded by TAS-B", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_dot_tas_b_b256-dl19-passage-20230124-99b795.tar.gz", | |
], | |
"md5": "a0a23a1be77e6e9e5dfacf32dfcd5e9b", | |
"size (bytes)": 124809, | |
"total_queries": 43, | |
"downloaded": False | |
}, | |
"distilbert_tas_b-dl20": { | |
"description": "TREC DL20 queries encoded by TAS-B", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-distilbert_dot_tas_b_b256-dl20-passage-20230124-99b795.tar.gz", | |
], | |
"md5": "8ffb4d5a17a2c028fb5065ef8a394ab3", | |
"size (bytes)": 575875, | |
"total_queries": 200, | |
"downloaded": False | |
}, | |
"dpr_multi-nq-dev": { | |
"description": "Natural Question dev set questions encoded by DPR question encoder trained on multiset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-nq-dev-20210419-9323ec.tar.gz", | |
], | |
"md5": "c2fd32438129e4994ce2ce71e08de875", | |
"size (bytes)": 25129398, | |
"total_queries": 8757, | |
"downloaded": False | |
}, | |
"dpr_multi-nq-test": { | |
"description": "Natural Question test set questions encoded by DPR question encoder trained on multiset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-nq-test-20210419-9323ec.tar.gz", | |
], | |
"md5": "1791f1ed078beb3a00847f75023eb020", | |
"size (bytes)": 10365005, | |
"total_queries": 3610, | |
"downloaded": False | |
}, | |
"ance_multi-nq-dev": { | |
"description": "Natural Question dev set questions encoded by ANCE question encoder trained on multiset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-nq-dev-20210419-9323ec.tar.gz", | |
], | |
"md5": "a3ed32ec8d5a474f61e3c3a9968b26fd", | |
"size (bytes)": 25163934, | |
"total_queries": 8757, | |
"downloaded": False | |
}, | |
"ance_multi-nq-test": { | |
"description": "Natural Question test set questions encoded by ANCE question encoder trained on multiset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-nq-test-20210419-9323ec.tar.gz", | |
], | |
"md5": "a356202b7c8f73758732c893a76a8005", | |
"size (bytes)": 10379384, | |
"total_queries": 3610, | |
"downloaded": False | |
}, | |
"dpr_multi-trivia-dev": { | |
"description": "TriviaQA dev set questions encoded by DPR question encoder trained on multiset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-trivia-dev-20210419-9323ec.tar.gz", | |
], | |
"md5": "efac7b71ef52ca073331e896089456a4", | |
"size (bytes)": 25517034, | |
"total_queries": 8837, | |
"downloaded": False | |
}, | |
"dpr_multi-trivia-test": { | |
"description": "TriviaQA test set questions encoded by DPR question encoder trained on multiset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-trivia-test-20210419-9323ec.tar.gz", | |
], | |
"md5": "01e95455d55d0495d806549f04a02c24", | |
"size (bytes)": 32664437, | |
"total_queries": 11313, | |
"downloaded": False | |
}, | |
"ance_multi-trivia-dev": { | |
"description": "TriviaQA dev set questions encoded by ANCE question encoder trained on multiset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-trivia-dev-20210419-9323ec.tar.gz", | |
], | |
"md5": "bd88499a5785b15ba702173cc0e91417", | |
"size (bytes)": 25559775, | |
"total_queries": 8837, | |
"downloaded": False | |
}, | |
"ance_multi-trivia-test": { | |
"description": "TriviaQA test set questions encoded by ANCE question encoder trained on multiset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-ance_multi-trivia-test-20210419-9323ec.tar.gz", | |
], | |
"md5": "3844dfb7f8feb6b064fa48775a35c6ee", | |
"size (bytes)": 32717910, | |
"total_queries": 11313, | |
"downloaded": False | |
}, | |
"dpr_multi-wq-test": { | |
"description": "Web Questions test set questions encoded by DPR question encoder trained on multiset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-wq-test-20210419-9323ec.tar.gz", | |
], | |
"md5": "19aa721632d05afe031cc2da83a9a5a5", | |
"size (bytes)": 5826854, | |
"total_queries": 2032, | |
"downloaded": False | |
}, | |
"dpr_multi-squad-test": { | |
"description": "SQUAD dev set questions encoded by DPR question encoder trained on multiset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-squad-test-20210419-9323ec.tar.gz", | |
], | |
"md5": "d11e0f801a488d51ad2a63b0748f4ae0", | |
"size (bytes)": 30328268, | |
"total_queries": 10570, | |
"downloaded": False | |
}, | |
"dpr_multi-curated-test": { | |
"description": "CuratedTREC test set questions encoded by DPR question encoder trained on multiset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_multi-curated-test-20210419-9323ec.tar.gz", | |
], | |
"md5": "d1737d3ec5a080d93350ae76b02c7fd1", | |
"size (bytes)": 1995280, | |
"total_queries": 694, | |
"downloaded": False | |
}, | |
"dpr_single_nq-nq-dev": { | |
"description": "NQ dev set questions encoded by DPR question encoder trained on NQ dataset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_single_nq-nq-dev-20210419-9323ec.tar.gz", | |
], | |
"md5": "1a992f8d5336dc8654bba5ab7e375ebe", | |
"size (bytes)": 25123288, | |
"total_queries": 8757, | |
"downloaded": False | |
}, | |
"dpr_single_nq-nq-test": { | |
"description": "NQ test set questions encoded by DPR question encoder trained on NQ dataset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dpr_single_nq-nq-test-20210419-9323ec.tar.gz", | |
], | |
"md5": "e64bb009b6ba8bfe40d4b9967fd69240", | |
"size (bytes)": 10362252, | |
"total_queries": 3610, | |
"downloaded": False | |
}, | |
"bpr_single_nq-nq-test": { | |
"description": "NQ test set questions encoded by BPR question encoder trained on NQ dataset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-bpr_single_nq-nq-test-20210827-8a8f75.tar.gz", | |
], | |
"md5": "b139d5a096ad52d2abc66fb54ec66158", | |
"size (bytes)": 11094680, | |
"total_queries": 3610, | |
"downloaded": False | |
}, | |
"dkrr-dpr-nq-retriever-dpr-nq-dev": { | |
"description": "DPR-NQ dev set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-dpr-nq-dev-20220304-7ffa54.tar.gz", | |
], | |
"md5": "fe1276ae841bd5be6f3e0daac144273a", | |
"size (bytes)": 25146740, | |
"total_queries": 8757, | |
"downloaded": False | |
}, | |
"dkrr-dpr-nq-retriever-dpr-nq-test": { | |
"description": "DPR-NQ test set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-dpr-nq-test-20220304-7ffa54.tar.gz", | |
], | |
"md5": "6c7793a0a89e7d10309a6973c52de326", | |
"size (bytes)": 10370414, | |
"total_queries": 3610, | |
"downloaded": False | |
}, | |
"dkrr-dpr-nq-retriever-nq-dev": { | |
"description": "NQ dev set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-nq-dev-20220304-7ffa54.tar.gz", | |
], | |
"md5": "3c84c7fb6569d7690d5c38be61d3a5a4", | |
"size (bytes)": 25146526, | |
"total_queries": 8757, | |
"downloaded": False | |
}, | |
"dkrr-dpr-nq-retriever-nq-test": { | |
"description": "NQ test set questions encoded by castorini/dkrr-dpr-nq-retriever trained on NQ dataset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-nq-retriever-nq-test-20220304-7ffa54.tar.gz", | |
], | |
"md5": "cd3c30fc6dfde160983167b59acb17a3", | |
"size (bytes)": 10370264, | |
"total_queries": 3610, | |
"downloaded": False | |
}, | |
"dkrr-dpr-tqa-retriever-dpr-tqa-dev": { | |
"description": "TriviaQA dev set questions encoded by castorini/dkrr-dpr-tqa-retriever trained on TriviaQA dataset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-tqa-retriever-tqa-dev-20220304-7ffa54.tar.gz", | |
], | |
"md5": "f9ca5060cf7794b681cd4fe3d3708c4d", | |
"size (bytes)": 25540932, | |
"total_queries": 8837, | |
"downloaded": False | |
}, | |
"dkrr-dpr-tqa-retriever-dpr-tqa-test": { | |
"description": "TriviaQA test set questions encoded by castorini/dkrr-dpr-tqa-retriever trained on TriviaQA dataset", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-dkrr-dpr-tqa-retriever-tqa-test-20220304-7ffa54.tar.gz", | |
], | |
"md5": "9cbd030c3a4478b7eb8356844bacc45b", | |
"size (bytes)": 32688909, | |
"total_queries": 11313, | |
"downloaded": False | |
}, | |
"wiki-6-3-all-dpr2-multi-nq-test": { | |
"description": "NQ test set questions encoded by castorini/wiki-all-6-3-multi-dpr2-query-encoder.", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-wiki-all-6-3-dpr2-multi-retriever-nq-test-20230103-186fa7.tar.gz", | |
], | |
"md5": "2632ca1392a33e975d505acd5090250a", | |
"size (bytes)": 10354577, | |
"total_queries": 3610, | |
"downloaded": False | |
}, | |
"wiki-6-3-all-dpr2-multi-dpr-trivia-test": { | |
"description": "TriviaQA test set questions encoded by castorini/wiki-all-6-3-multi-dpr2-query-encoder.", | |
"urls": [ | |
"https://github.com/castorini/pyserini-data/raw/main/encoded-queries/query-embedding-wiki-all-6-3-dpr2-multi-retriever-dpr-trivia-test-20230103-186fa7.tar.gz", | |
], | |
"md5": "d0abf8ff598daaec35acd972a465b0e2", | |
"size (bytes)": 32620950, | |
"total_queries": 11313, | |
"downloaded": False | |
} | |
} | |