Spaces:
Build error
Build error
# Copyright (c) Facebook, Inc. and its affiliates. | |
# All rights reserved. | |
# | |
# This source code is licensed under the license found in the | |
# LICENSE file in the root directory of this source tree. | |
# | |
source_lang=kk_KZ | |
target_lang=en_XX | |
MODEL=criss_checkpoints/criss.3rd.pt | |
SPM=criss_checkpoints/sentence.bpe.model | |
SPLIT=test | |
LANG_DICT=criss_checkpoints/lang_dict.txt | |
ENCODER_ANALYSIS=sentence_retrieval/encoder_analysis.py | |
SAVE_ENCODER=save_encoder.py | |
ENCODER_SAVE_ROOT=sentence_embeddings/$MODEL | |
DATA_DIR=data_tmp | |
INPUT_DIR=$DATA_DIR/${source_lang}-${target_lang}-tatoeba | |
ENCODER_SAVE_DIR=${ENCODER_SAVE_ROOT}/${source_lang}-${target_lang} | |
mkdir -p $ENCODER_SAVE_DIR/${target_lang} | |
mkdir -p $ENCODER_SAVE_DIR/${source_lang} | |
# Save encoder outputs for source sentences | |
python $SAVE_ENCODER \ | |
${INPUT_DIR} \ | |
--path ${MODEL} \ | |
--task translation_multi_simple_epoch \ | |
--lang-dict ${LANG_DICT} \ | |
--gen-subset ${SPLIT} \ | |
--bpe 'sentencepiece' \ | |
--lang-pairs ${source_lang}-${target_lang} \ | |
-s ${source_lang} -t ${target_lang} \ | |
--sentencepiece-model ${SPM} \ | |
--remove-bpe 'sentencepiece' \ | |
--beam 1 \ | |
--lang-tok-style mbart \ | |
--encoder-save-dir ${ENCODER_SAVE_DIR}/${source_lang} | |
# Save encoder outputs for target sentences | |
python $SAVE_ENCODER \ | |
${INPUT_DIR} \ | |
--path ${MODEL} \ | |
--lang-dict ${LANG_DICT} \ | |
--task translation_multi_simple_epoch \ | |
--gen-subset ${SPLIT} \ | |
--bpe 'sentencepiece' \ | |
--lang-pairs ${target_lang}-${source_lang} \ | |
-t ${source_lang} -s ${target_lang} \ | |
--sentencepiece-model ${SPM} \ | |
--remove-bpe 'sentencepiece' \ | |
--beam 1 \ | |
--lang-tok-style mbart \ | |
--encoder-save-dir ${ENCODER_SAVE_DIR}/${target_lang} | |
# Analyze sentence retrieval accuracy | |
python $ENCODER_ANALYSIS --langs "${source_lang},${target_lang}" ${ENCODER_SAVE_DIR} | |