Virtual environment
apt install python3-dev
apt install python3-venv
apt install ffmpeg libavcodec-extra
python3.10 -m venv venv && . venv/bin/activate
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
pip install Cython
pip install -U wheel
pip install git+https://github.com/NVIDIA/NeMo.git@1fa961ba03ab5f8c91b278640e29807079373372
pip install pyannote.audio==3.2.0
mkdir ./data
wget https://n-ws-q0bez.s3pd12.sbercloud.ru/b-ws-q0bez-jpv/GigaAM/{ssl_model_weights.ckpt,emo_model_weights.ckpt,ctc_model_weights.ckpt,rnnt_model_weights.ckpt,ctc_model_config.yaml,emo_model_config.yaml,encoder_config.yaml,rnnt_model_config.yaml,tokenizer_all_sets.tar,example.wav,long_example.wav} -P ./data && tar -xf ./data/tokenizer_all_sets.tar --directory ./data/ && rm ./data/tokenizer_all_sets.tar
python ssl_inference.py --encoder_config ./data/encoder_config.yaml \
--model_weights ./data/ssl_model_weights.ckpt --device cuda --audio_path ./data/example.wav
python ctc_inference.py --model_config ./data/ctc_model_config.yaml \
--model_weights ./data/ctc_model_weights.ckpt --device cuda --audio_path ./data/example.wav
python ctc_longform_inference.py --model_config ./data/ctc_model_config.yaml \
--model_weights ./data/ctc_model_weights.ckpt --device cuda \
--audio_path ./data/long_example.wav --hf_token <YOUR_HF_TOKEN>
python rnnt_inference.py --model_config ./data/rnnt_model_config.yaml \
--model_weights ./data/rnnt_model_weights.ckpt --tokenizer_path ./data/tokenizer_all_sets \
--device cuda --audio_path ./data/example.wav
python rnnt_longform_inference.py --model_config ./data/rnnt_model_config.yaml \
--model_weights ./data/rnnt_model_weights.ckpt --tokenizer_path ./data/tokenizer_all_sets \
--device cuda --audio_path ./data/long_example.wav --hf_token <YOUR_HF_TOKEN>
python emo_inference.py --model_config ./data/emo_model_config.yaml \
--model_weights ./data/emo_model_weights.ckpt --device cuda --audio_path ./data/example.wav
Docker
docker build -t gigaam_image .
docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
python /workspace/gigaam/ssl_inference.py --encoder_config /workspace/data/encoder_config.yaml \
--model_weights /workspace/data/ssl_model_weights.ckpt \
--device cuda --audio_path /workspace/data/example.wav
docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
python /workspace/gigaam/ctc_inference.py --model_config /workspace/data/ctc_model_config.yaml \
--model_weights /workspace/data/ctc_model_weights.ckpt \
--device cuda --audio_path /workspace/data/example.wav
docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
python /workspace/gigaam/ctc_longform_inference.py --model_config /workspace/data/ctc_model_config.yaml \
--model_weights /workspace/data/ctc_model_weights.ckpt --device cuda \
--audio_path /workspace/data/long_example.wav --hf_token <YOUR_HF_TOKEN>
docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
python /workspace/gigaam/rnnt_inference.py --model_config /workspace/data/rnnt_model_config.yaml \
--model_weights /workspace/data/rnnt_model_weights.ckpt --tokenizer_path /workspace/data/tokenizer_all_sets \
--device cuda --audio_path /workspace/data/example.wav
docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
python /workspace/gigaam/rnnt_longform_inference.py --model_config /workspace/data/rnnt_model_config.yaml \
--model_weights /workspace/data/rnnt_model_weights.ckpt --tokenizer_path /workspace/data/tokenizer_all_sets \
--device cuda --audio_path /workspace/data/long_example.wav --hf_token <YOUR_HF_TOKEN>
docker run -v $PWD:/workspace/gigaam --gpus all gigaam_image \
python /workspace/gigaam/emo_inference.py --model_config /workspace/data/emo_model_config.yaml \
--model_weights /workspace/data/emo_model_weights.ckpt \
--device cuda --audio_path /workspace/data/example.wav