File size: 836 Bytes
b585c7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/bin/bash -e

while true; do
  http_code=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:5000/v1/completions \
    -H "Content-Type: application/json" \
    -d '{
      "model": "h2oai/h2ogpt-4096-llama2-13b-chat",
      "prompt": "San Francisco is a",
      "max_tokens": 7,
      "temperature": 0
    }')

  if [ "$http_code" -eq 200 ]; then
    echo "Received HTTP 200 status code. Starting h2ogpt service"
    CUDA_VISIBLE_DEVICES=$(seq -s, $(($(nvidia-smi -L | wc -l) / 2)) $(($(nvidia-smi -L | wc -l) - 1))) /h2ogpt_conda/bin/python3.10 \
      /workspace/generate.py \
      --inference_server="vllm:0.0.0.0:5000" \
      --base_model=h2oai/h2ogpt-4096-llama2-13b-chat \
      --langchain_mode=UserData
    break
  else
    echo "Received HTTP $http_code status code. Retrying in 5 seconds..."
    sleep 5
  fi
done