#!/usr/bin/env bash CUDA_VISIBLE_DEVICES="0" torchrun run_model.py --pipeline-model-parallel-size 1 --tensor-model-parallel-size 1