rag-intrinsics-lib / run_ollama.sh

Add run Ollama script (#32)

789c7db verified 6 days ago

3.88 kB

	#! /bin/bash

	################################################################################
	# Shell script that starts a copy of Ollama with a base model plus all the
	# available LoRA adapters in this repository.
	#
	# Converts `.safetensors` to `.gguf` for running in Ollama.
	# Target application is GitHub Actions and CPU only systems.
	# To keep the intermediate files, remove the `rm` command at the end.
	#
	# To run this script:
	# 1. Install an appropriate build of Ollama for your machine and use port 55555
	# See https://docs.ollama.com/linux#manual-install and set port with
	# ```
	# curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz \
	# \| sudo tar zx -C /usr
	# OLLAMA_HOST="localhost:55555" ollama serve &
	# ```
	# See https://docs.ollama.com/faq#how-do-i-configure-ollama-server for other
	# operating systems.
	# 3. Download the intrinsics library by running:
	# hf download ibm-granite/rag-intrinsics-lib --local-dir ./rag-intrinsics-lib
	# 4. Edit the constants BASE_MODEL_NAME, BASE_MODEL_ORG as needed
	# 5. Run this script from the root of your local copy of rag-intrinsics-lib.
	################################################################################

	echo "Setup"
	OLLAMA_HOST="localhost:55555"
	OLLAMA_DIR="_ollama"
	mkdir -p $OLLAMA_DIR
	echo ""

	echo "Download base model"
	MODEL_DIR="$OLLAMA_DIR/models"
	mkdir -p $MODEL_DIR

	OLLAMA_MODEL_NAME=granite3.3:8b # Quantized model on Ollama
	BASE_MODEL_NAME=granite-3.3-8b-instruct
	BASE_MODEL_NAME_UPPER=granite-3.3-8B-instruct # llama.cpp output
	BASE_MODEL_ORG=ibm-granite

	pip install huggingface_hub
	hf download $BASE_MODEL_ORG/$BASE_MODEL_NAME --local-dir $MODEL_DIR/$BASE_MODEL_NAME
	echo ""

	echo "Clone llama.cpp and install dependencies"
	LLAMA_CPP_FOLDER_NAME="$OLLAMA_DIR/llama.cpp"
	git clone --single-branch --branch master https://github.com/ggml-org/llama.cpp.git $LLAMA_CPP_FOLDER_NAME
	pip install -r $LLAMA_CPP_FOLDER_NAME/requirements/requirements-convert_hf_to_gguf.txt
	pip install -r $LLAMA_CPP_FOLDER_NAME/requirements/requirements-convert_hf_to_gguf_update.txt
	pip install -r $LLAMA_CPP_FOLDER_NAME/requirements/requirements-convert_lora_to_gguf.txt
	echo ""

	OUTTYPE="q8_0"
	OUTTYPE_UPPER="Q8_0" # llama.cpp output
	echo "Convert base model to GGUF: $OUTTYPE"

	python $LLAMA_CPP_FOLDER_NAME/convert_hf_to_gguf.py $MODEL_DIR/$BASE_MODEL_NAME --outtype $OUTTYPE
	MODEL_GGUF=$(realpath "$MODEL_DIR/$BASE_MODEL_NAME/$BASE_MODEL_NAME_UPPER-$OUTTYPE_UPPER.gguf")
	echo ""

	echo "Convert LoRA adapters to GGUF"
	LORA_DIRS=$( find . -name "$BASE_MODEL_NAME" -path "/lora/$BASE_MODEL_NAME" \| sort \| cut -c 3- )
	for LORA_DIR in $LORA_DIRS; do
	LORA_GGUF="$LORA_DIR/$BASE_MODEL_NAME_UPPER-$OUTTYPE_UPPER-LoRA.gguf"
	if [ ! -f "$LORA_GGUF" ]; then
	python $LLAMA_CPP_FOLDER_NAME/convert_lora_to_gguf.py $LORA_DIR --base $MODEL_DIR/$BASE_MODEL_NAME --outtype $OUTTYPE
	fi
	done
	echo ""

	echo "Create Modelfiles and Ollama models"
	MODELFILE_DIR="$OLLAMA_DIR/Modelfiles"
	mkdir -p $MODELFILE_DIR
	for LORA_DIR in $LORA_DIRS; do
	LORA_GGUF=$(realpath "$LORA_DIR/$BASE_MODEL_NAME_UPPER-$OUTTYPE_UPPER-LoRA.gguf")
	MODEL_FILE=$LORA_DIR
	MODELFILE=${MODEL_FILE//\//_}
	MODELFILE=${MODELFILE//../"Modelfile"}

	MODELFILEPATH=$MODELFILE_DIR/$MODELFILE

	LORA_NAME=$(echo "$LORA_DIR" \| cut -d "/" -f 1)

	echo ""
	echo "Creating $LORA_NAME \| $MODELFILEPATH"

	# Use GGUF converted model
	# printf "FROM $MODEL_GGUF\nADAPTER $LORA_GGUF\n" > $MODELFILEPATH
	# printf "FROM $MODEL_GGUF\nADAPTER $LORA_GGUF\n"

	# Use quantized model from Ollama
	printf "FROM $OLLAMA_MODEL_NAME\nADAPTER $LORA_GGUF\n" > $MODELFILEPATH
	printf "FROM $OLLAMA_MODEL_NAME\nADAPTER $LORA_GGUF\n"

	echo ""

	OLLAMA_HOST=$OLLAMA_HOST ollama create $LORA_NAME -f $MODELFILEPATH
	done
	echo ""

	echo "Clean up"
	echo "rm -rf $OLLAMA_DIR"
	rm -rf $OLLAMA_DIR