# Task name (optional), used for display purposes.
name: nlp_use_case

# Working directory (optional), synced to ~/sky_workdir on the remote cluster
# each time launch or exec is run with the yaml file.
#
# Commands in "setup" and "run" will be executed under it.
#
# If a .gitignore file (or a .git/info/exclude file) exists in the working
# directory, files and directories listed in it will be excluded from syncing.
workdir: ./gradio

setup: |
  echo "Begin setup."
  pip install -r requirements.txt
  echo "Setup complete."

run: |
  conda activate vllm
  echo 'Starting vllm api server...'
  python -u -m app.py \
                   ----tokenizer_name $MODEL_NAME \
                   --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
                   --tokenizer hf-internal-testing/llama-tokenizer 2>&1 | tee api_server.log &
  echo 'Waiting for vllm api server to start...'
  while ! `cat api_server.log | grep -q 'Uvicorn running on'`; do sleep 1; done
  echo 'Starting gradio server...'
  python vllm/examples/gradio_webserver.py