NetsPresso_QA / encode_8shards_cuda2.sh
geonmin-kim's picture
Upload folder using huggingface_hub
d6585f5
#!/bin/bash
#SHARD_NUM=8
SHARDED_CORPUS_PREFIX="/root/Corpus/CAsT22_msmarcov2_kilt_flattened_8shards"
SHARDED_EMBEDDING_PREFIX="/ssd3/geonminkim/indexes/CAsT_21_22_msmarcov2_kilt/dense"
START_IDX=4
END_IDX=5
SHARD_NUM=8
GPU_ID=2
for SHARD_IDX in $(seq $START_IDX $END_IDX); do
echo "encoding for SHARD_IDX = $SHARD_IDX"
python -m pyserini.encode \
input --corpus $SHARDED_CORPUS_PREFIX/shard_$SHARD_IDX \
--fields text \
--shard-id $SHARD_IDX \
--shard-num $SHARD_NUM \
output --embeddings $SHARDED_EMBEDDING_PREFIX/shard_$SHARD_IDX \
--to-faiss \
encoder --encoder castorini/tct_colbert-v2-msmarco-cqe \
--fields text \
--device cuda:$GPU_ID \
--batch 128 \
--fp16
done