|
import os
|
|
import random
|
|
|
|
from colbert.utils.parser import Arguments
|
|
from colbert.utils.runs import Run
|
|
|
|
from colbert.evaluation.loaders import load_colbert, load_qrels, load_queries
|
|
from colbert.indexing.faiss import get_faiss_index_name
|
|
from colbert.ranking.retrieval import retrieve
|
|
from colbert.ranking.batch_retrieval import batch_retrieve
|
|
|
|
|
|
def main():
|
|
random.seed(12345)
|
|
|
|
parser = Arguments(description='End-to-end retrieval and ranking with ColBERT.')
|
|
|
|
parser.add_model_parameters()
|
|
parser.add_model_inference_parameters()
|
|
parser.add_ranking_input()
|
|
parser.add_retrieval_input()
|
|
|
|
parser.add_argument('--faiss_name', dest='faiss_name', default=None, type=str)
|
|
parser.add_argument('--faiss_depth', dest='faiss_depth', default=1024, type=int)
|
|
parser.add_argument('--part-range', dest='part_range', default=None, type=str)
|
|
parser.add_argument('--batch', dest='batch', default=False, action='store_true')
|
|
parser.add_argument('--depth', dest='depth', default=1000, type=int)
|
|
|
|
args = parser.parse()
|
|
|
|
args.depth = args.depth if args.depth > 0 else None
|
|
|
|
if args.part_range:
|
|
part_offset, part_endpos = map(int, args.part_range.split('..'))
|
|
args.part_range = range(part_offset, part_endpos)
|
|
|
|
with Run.context():
|
|
args.colbert, args.checkpoint = load_colbert(args)
|
|
args.qrels = load_qrels(args.qrels)
|
|
args.queries = load_queries(args.queries)
|
|
|
|
args.index_path = os.path.join(args.index_root, args.index_name)
|
|
|
|
if args.faiss_name is not None:
|
|
args.faiss_index_path = os.path.join(args.index_path, args.faiss_name)
|
|
else:
|
|
args.faiss_index_path = os.path.join(args.index_path, get_faiss_index_name(args))
|
|
|
|
if args.batch:
|
|
batch_retrieve(args)
|
|
else:
|
|
retrieve(args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|