Spaces:
Sleeping
Sleeping
# Task name (optional), used for display purposes. | |
name: nlp_use_case | |
# Working directory (optional), synced to ~/sky_workdir on the remote cluster | |
# each time launch or exec is run with the yaml file. | |
# | |
# Commands in "setup" and "run" will be executed under it. | |
# | |
# If a .gitignore file (or a .git/info/exclude file) exists in the working | |
# directory, files and directories listed in it will be excluded from syncing. | |
workdir: ./gradio | |
setup: | | |
echo "Begin setup." | |
pip install -r requirements.txt | |
echo "Setup complete." | |
run: | | |
conda activate vllm | |
echo 'Starting vllm api server...' | |
python -u -m app.py \ | |
----tokenizer_name $MODEL_NAME \ | |
--tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \ | |
--tokenizer hf-internal-testing/llama-tokenizer 2>&1 | tee api_server.log & | |
echo 'Waiting for vllm api server to start...' | |
while ! `cat api_server.log | grep -q 'Uvicorn running on'`; do sleep 1; done | |
echo 'Starting gradio server...' | |
python vllm/examples/gradio_webserver.py |