test-docker / entrypoint.sh
sofianhw's picture
add api server and openapi
690332d
raw
history blame
1.26 kB
#!/bin/bash
if [[ ! -z "${HF_TOKEN}" ]]; then
echo "The HF_TOKEN environment variable set, logging to Hugging Face."
python3 -c "import huggingface_hub; huggingface_hub.login('${HF_TOKEN}')"
else
echo "The HF_TOKEN environment variable is not set or empty, not logging to Hugging Face."
fi
additional_args=${EXTRA_ARGS:-""}
if [[ ! -z "${QUANTIZATION}" ]]; then
if [[ -z "${DTYPE}" ]]; then
echo "Missing required environment variable DTYPE when QUANTIZATION is set"
exit 1
else
additional_args="${additional_args} -q ${QUANTIZATION} --dtype ${DTYPE}"
fi
fi
if [[ ! -z "${GPU_MEMORY_UTILIZATION}" ]]; then
additional_args="${additional_args} --gpu-memory-utilization ${GPU_MEMORY_UTILIZATION}"
fi
if [[ ! -z "${MAX_MODEL_LEN}" ]]; then
additional_args="${additional_args} --max-model-len ${MAX_MODEL_LEN}"
fi
if [[ ! -z "${ROOT_PATH}" ]]; then
additional_args="${additional_args} --root-path ${ROOT_PATH}"
fi
# Run the provided command
# exec python3 -u -m vllm.entrypoints.openai.api_server \
# --model "${HF_MODEL}" \
# --host 0.0.0.0 \
# --port 7860 \
# ${additional_args}
exec python3 -u api_server.py \
--model "${HF_MODEL}" \
--host 0.0.0.0 \
--port 7860 \
${additional_args}