NetsPresso_QA / encode_8shards_cuda1.sh
geonmin-kim's picture
Upload folder using huggingface_hub
d6585f5
raw
history blame contribute delete
844 Bytes
#!/bin/bash
#SHARD_NUM=8
SHARDED_CORPUS_PREFIX="/root/Corpus/CAsT22_msmarcov2_kilt_flattened_8shards"
SHARDED_EMBEDDING_PREFIX="/ssd3/geonminkim/indexes/CAsT_21_22_msmarcov2_kilt/dense"
START_IDX=2
END_IDX=3
SHARD_NUM=8
GPU_ID=1
for SHARD_IDX in $(seq $START_IDX $END_IDX); do
echo "encoding for SHARD_IDX = $SHARD_IDX"
python -m pyserini.encode \
input --corpus $SHARDED_CORPUS_PREFIX/shard_$SHARD_IDX \
--fields text \
--shard-id $SHARD_IDX \
--shard-num $SHARD_NUM \
output --embeddings $SHARDED_EMBEDDING_PREFIX/shard_$SHARD_IDX \
--to-faiss \
encoder --encoder castorini/tct_colbert-v2-msmarco-cqe \
--fields text \
--device cuda:$GPU_ID \
--batch 128 \
--fp16
done