iblfe's picture
Upload folder using huggingface_hub
b585c7f verified
#!/bin/bash -e
while true; do
http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:5000/v1/completions \
-H "Content-Type: application/json" \
-d '{
"model": "h2oai/h2ogpt-4096-llama2-13b-chat",
"prompt": "San Francisco is a",
"max_tokens": 7,
"temperature": 0
}')
if [ "$http_code" -eq 200 ]; then
echo "Received HTTP 200 status code. Starting h2ogpt service"
CUDA_VISIBLE_DEVICES=$(seq -s, $(($(nvidia-smi -L | wc -l) / 2)) $(($(nvidia-smi -L | wc -l) - 1))) /h2ogpt_conda/bin/python3.10 \
/workspace/generate.py \
--inference_server="vllm:0.0.0.0:5000" \
--base_model=h2oai/h2ogpt-4096-llama2-13b-chat \
--langchain_mode=UserData
break
else
echo "Received HTTP $http_code status code. Retrying in 5 seconds..."
sleep 5
fi
done