#/bin/bash root_dir=$(pwd) echo "Setting up the environment in the $root_dir" # -------------------------------------------------------------- # create and activate the virtual environment # -------------------------------------------------------------- echo "Creating a virtual environment with python3" conda create -n itv2_hf python=3.9 -y conda activate itv2_hf echo "Installing all the dependencies" conda install pip python3 -m pip install --upgrade pip # -------------------------------------------------------------- # PyTorch Installation # -------------------------------------------------------------- python3 -m pip install torch --extra-index-url https://download.pytorch.org/whl/cu118 # -------------------------------------------------------------- # Install IndicNLP library and necessary resources # -------------------------------------------------------------- git clone https://github.com/anoopkunchukuttan/indic_nlp_resources.git export INDIC_RESOURCES_PATH=$root_dir/indic_nlp_resources # we use version 0.92 which is the latest in the github repo git clone https://github.com/anoopkunchukuttan/indic_nlp_library.git cd indic_nlp_library python3 -m pip install ./ cd $root_dir # -------------------------------------------------------------- # Install additional utility packages # -------------------------------------------------------------- python3 -m pip install sacremoses pandas regex mock transformers==4.33.2 urduhack[tf] mosestokenizer python3 -c "import urduhack; urduhack.download()" python3 -m pip install bitsandbytes scipy accelerate datasets # -------------------------------------------------------------- # Sentencepiece for tokenization # -------------------------------------------------------------- # build the cpp binaries from the source repo in order to use the command line utility # source repo: https://github.com/google/sentencepiece python3 -m pip install sentencepiece echo "Setup completed!"