#!/bin/bash #SHARD_NUM=8 SHARDED_CORPUS_PREFIX="/root/Corpus/CAsT22_msmarcov2_kilt_flattened_8shards" SHARDED_EMBEDDING_PREFIX="/ssd3/geonminkim/indexes/CAsT_21_22_msmarcov2_kilt/dense" START_IDX=6 END_IDX=7 SHARD_NUM=8 GPU_ID=3 for SHARD_IDX in $(seq $START_IDX $END_IDX); do echo "encoding for SHARD_IDX = $SHARD_IDX" python -m pyserini.encode \ input --corpus $SHARDED_CORPUS_PREFIX/shard_$SHARD_IDX \ --fields text \ --shard-id $SHARD_IDX \ --shard-num $SHARD_NUM \ output --embeddings $SHARDED_EMBEDDING_PREFIX/shard_$SHARD_IDX \ --to-faiss \ encoder --encoder castorini/tct_colbert-v2-msmarco-cqe \ --fields text \ --device cuda:$GPU_ID \ --batch 128 \ --fp16 done