File size: 758 Bytes
433b752 8c02dd9 6892c67 73f0ed3 8385e49 6892c67 29c6e96 67e76e6 a030a88 73f0ed3 67e76e6 6892c67 bac467e 433b752 67e76e6 e11772a 433b752 6dee22b c223384 433b752 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
#!/bin/bash
if [[ "$QUANTIZATION" == "false" ]]; then
text-generation-launcher --model-id $MODEL_NAME \
--num-shard 1 --port 8080 --trust-remote-code \
--max-concurrent-requests $MAX_CONCURRENT_REQUESTS \
--max-input-length $MAX_INPUT_LENGTH \
--max-total-tokens $MAX_TOTAL_TOKENS \
&
else
text-embeddings-router --model-id $MODEL_NAME \
--port 8080 \
--max-concurrent-requests $MAX_CONCURRENT_REQUESTS \
--dtype $DTYPE \
&
fi
# Wait for text-embedding-inference to start
curl --retry 60 --retry-delay 10 --retry-connrefused http://127.0.0.1:8080/health
# Start the gradio
python3 app/main.py --port $GRADIO_PORT &
# Wait for any process to exit
wait -n
# Exit with status of process that exited first
exit $? |