Spaces:

chansung
/

gradio_together_tgi

Runtime error

File size: 985 Bytes

433b752
 
47200c8
 
 
8c02dd9
6892c67
 
73f0ed3
8385e49
 
6892c67
29c6e96
6892c67
 
73f0ed3
8385e49
 
6892c67
 
bac467e
433b752
 
e11772a
433b752
6dee22b
f28f0b8
433b752

#!/bin/bash

export HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN
export HF_TOKEN=$HUGGING_FACE_HUB_TOKEN

if [[ "$QUANTIZATION" == "false" ]]; then 
  text-generation-launcher --model-id $MODEL_NAME \
    --num-shard 1 --port 8080 --trust-remote-code \
    --max-concurrent-requests $MAX_CONCURRENT_REQUESTS \
    --max-input-length $MAX_INPUT_LENGTH \
    --max-total-tokens $MAX_TOTAL_TOKENS \
    & 
else 
  text-generation-launcher --model-id $MODEL_NAME \
    --num-shard 1 --port 8080 --trust-remote-code \
    --max-concurrent-requests $MAX_CONCURRENT_REQUESTS \
    --max-input-length $MAX_INPUT_LENGTH \
    --max-total-tokens $MAX_TOTAL_TOKENS \
    --quantize $QUANTIZATION \
    & 
fi

# Wait for text-generation-inference to start
curl --retry 60 --retry-delay 10 --retry-connrefused http://127.0.0.1:8080/health

# Start the gradio
python app/main.py --port $GRADIO_PORT &

# Wait for any process to exit
wait -n

# Exit with status of process that exited first
exit $?