Spaces:
Runtime error
Runtime error
#SHARD_NUM=8 | |
SHARDED_CORPUS_PREFIX="/root/Corpus/CAsT22_msmarcov2_kilt_flattened_8shards" | |
SHARDED_EMBEDDING_PREFIX="/ssd3/geonminkim/indexes/CAsT_21_22_msmarcov2_kilt/dense" | |
START_IDX=0 | |
END_IDX=1 | |
SHARD_NUM=8 | |
GPU_ID=0 | |
for SHARD_IDX in $(seq $START_IDX $END_IDX); do | |
echo "encoding for SHARD_IDX = $SHARD_IDX" | |
python -m pyserini.encode \ | |
input --corpus $SHARDED_CORPUS_PREFIX/shard_$SHARD_IDX \ | |
--fields text \ | |
--shard-id $SHARD_IDX \ | |
--shard-num $SHARD_NUM \ | |
output --embeddings $SHARDED_EMBEDDING_PREFIX/shard_$SHARD_IDX \ | |
--to-faiss \ | |
encoder --encoder castorini/tct_colbert-v2-msmarco-cqe \ | |
--fields text \ | |
--device cuda:$GPU_ID \ | |
--batch 128 \ | |
--fp16 | |
done | |