#!/bin/bash if [[ "$QUANTIZATION" == "false" ]]; then text-generation-launcher --model-id $MODEL_NAME \ --num-shard 1 --port 8080 --trust-remote-code \ --max-concurrent-requests $MAX_CONCURRENT_REQUESTS \ --max-input-length $MAX_INPUT_LENGTH \ --max-total-tokens $MAX_TOTAL_TOKENS \ & else text-generation-launcher --model-id $MODEL_NAME \ --num-shard 1 --port 8080 --trust-remote-code \ --max-concurrent-requests $MAX_CONCURRENT_REQUESTS \ --max-input-length $MAX_INPUT_LENGTH \ --max-total-tokens $MAX_TOTAL_TOKENS \ --quantize $QUANTIZATION \ & fi # Wait for text-generation-inference to start curl --retry 60 --retry-delay 10 --retry-connrefused http://127.0.0.1:8080/health # Start the gradio python app/main.py --port $GRADIO_PORT & # Wait for any process to exit wait -n # Exit with status of process that exited first exit $?