MODEL_NAME= open-llama-7b-v2 PYTHON?= python LLAMA_TAG= db4047a LLAMA_TAR= master-$(LLAMA_TAG).tar.gz HF_REPO= openlm-research/open_llama_7b_v2 HF_REF= main HF_FILES= pytorch_model-00001-of-00002.bin \ pytorch_model-00002-of-00002.bin \ tokenizer.model $(HF_FILES): SITE= https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF) $(LLAMA_TAR): SITE= https://github.com/ggerganov/llama.cpp/archive/refs/tags FILES= $(HF_FILES) $(LLAMA_TAR) QUANTS= f16 q4_0 q4_1 q5_0 q5_1 q8_0 \ q2_K \ q3_K_S q3_K_M q3_K_L \ q4_K_S q4_K_M \ q5_K_S q5_K_M \ q6_K MODEL_FILES= $(addsuffix .bin,$(addprefix $(MODEL_NAME)-,$(QUANTS))) .PHONY: all all: $(MODEL_FILES) SHA256SUMS $(FILES): curl -L -o $@ --url $(SITE)/$@ llama.cpp: $(LLAMA_TAR) mkdir -p $@ tar -xf $< --strip-components=1 -C $@ llama.cpp/quantize: llama.cpp $(MAKE) -C llama.cpp quantize $(MODEL_NAME)-f16.bin: $(HF_FILES) | llama.cpp $(PYTHON) llama.cpp/convert.py --outtype f16 --outfile $@ . $(MODEL_NAME)-q%.bin: $(MODEL_NAME)-f16.bin | llama.cpp/quantize llama.cpp/quantize $< $@ q$* %.sha: % sha256sum $< > $@ SHA256SUMS: $(addsuffix .sha,$(MODEL_FILES)) cat $^ > $@