# Task name (optional), used for display purposes. name: nlp_use_case # Working directory (optional), synced to ~/sky_workdir on the remote cluster # each time launch or exec is run with the yaml file. # # Commands in "setup" and "run" will be executed under it. # # If a .gitignore file (or a .git/info/exclude file) exists in the working # directory, files and directories listed in it will be excluded from syncing. workdir: ./gradio setup: | echo "Begin setup." pip install -r requirements.txt echo "Setup complete." run: | conda activate vllm echo 'Starting vllm api server...' python -u -m app.py \ ----tokenizer_name $MODEL_NAME \ --tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \ --tokenizer hf-internal-testing/llama-tokenizer 2>&1 | tee api_server.log & echo 'Waiting for vllm api server to start...' while ! `cat api_server.log | grep -q 'Uvicorn running on'`; do sleep 1; done echo 'Starting gradio server...' python vllm/examples/gradio_webserver.py