nlp_use_case / serve.yaml
safoinme's picture
Upload folder using huggingface_hub
5cae627
raw
history blame
1.05 kB
# Task name (optional), used for display purposes.
name: nlp_use_case
# Working directory (optional), synced to ~/sky_workdir on the remote cluster
# each time launch or exec is run with the yaml file.
#
# Commands in "setup" and "run" will be executed under it.
#
# If a .gitignore file (or a .git/info/exclude file) exists in the working
# directory, files and directories listed in it will be excluded from syncing.
workdir: ./gradio
setup: |
echo "Begin setup."
pip install -r requirements.txt
echo "Setup complete."
run: |
conda activate vllm
echo 'Starting vllm api server...'
python -u -m app.py \
----tokenizer_name $MODEL_NAME \
--tensor-parallel-size $SKYPILOT_NUM_GPUS_PER_NODE \
--tokenizer hf-internal-testing/llama-tokenizer 2>&1 | tee api_server.log &
echo 'Waiting for vllm api server to start...'
while ! `cat api_server.log | grep -q 'Uvicorn running on'`; do sleep 1; done
echo 'Starting gradio server...'
python vllm/examples/gradio_webserver.py