Spaces:
Running
Running
# Copyright (c) Facebook, Inc. and its affiliates. | |
# All rights reserved. | |
# | |
# This source code is licensed under the license found in the | |
# LICENSE file in the root directory of this source tree. | |
if [ -z $WORKDIR_ROOT ] ; | |
then | |
echo "please specify your working directory root in environment variable WORKDIR_ROOT. Exitting..." | |
exit | |
fi | |
if [ -z $SPM_PATH ] ; | |
then | |
echo "Please install sentence piecence from https://github.com/google/sentencepiece and set SPM_PATH pointing to the installed spm_encode.py. Exitting..." | |
exit | |
fi | |
ML50=${WORKDIR_ROOT}/ML50 | |
mkdir -p $ML50/dedup | |
mkdir -p $ML50/cleaned_dedup | |
python ./dedup_all.py --from-folder $ML50/raw --to-folder $ML50/dedup | |
python ./remove_valid_test_in_train.py --from-folder $ML50/dedup --to-folder $ML50/clean | |
python ./binarize.py --raw-folder $ML50/clean |