| ################################################################################ | |
| # Shell script that starts a copy of vLLM with a base model plus all the | |
| # available LoRA adapters in this repository. | |
| # | |
| # To run this script: | |
| # 1. Install an appropriate build of vLLM for your machine (`pip install vllm`) | |
| # 2. Install the Hugging Face CLI (`pip install -U "huggingface_hub[cli]"`) | |
| # 3. Download the intrinsics library by running: | |
| # hf download ibm-granite/rag-intrinsics-lib --local-dir ./rag-intrinsics-lib | |
| # 4. Edit the constants BASE_MODEL_NAME, BASE_MODEL_ORG, and PORT as needed | |
| # 5. Run this script from the root of your local copy of rag-intrinsics-lib. | |
| ################################################################################ | |
| BASE_MODEL_NAME=granite-3.3-8b-instruct | |
| BASE_MODEL_ORG=ibm-granite | |
| PORT=55555 | |
| export VLLM_API_KEY=rag_intrinsics_1234 | |
| # Find all LoRA adapters for the target base model. | |
| LORAS="" | |
| for item in "."/*; do | |
| # Remove the "./" | |
| name=$(basename -- "${item}") | |
| if [ -d "./${name}/lora/${BASE_MODEL_NAME}" ]; then | |
| LORAS+="${name}=./${name}/lora/${BASE_MODEL_NAME} " | |
| fi | |
| done | |
| CMD="vllm serve ${BASE_MODEL_ORG}/${BASE_MODEL_NAME} \ | |
| --port ${PORT} \ | |
| --gpu-memory-utilization 0.45 \ | |
| --max-model-len 8192 \ | |
| --enable-lora \ | |
| --max_lora_rank 64 \ | |
| --lora-modules $LORAS" | |
| echo $CMD | |
| $CMD | |