|
MODEL_NAME= open-llama-7b-v2 |
|
PYTHON?= python |
|
LLAMA_BUILD= 1132 |
|
LLAMA_TAR= b$(LLAMA_BUILD).tar.gz |
|
LLAMA_DIR= llama.cpp-b$(LLAMA_BUILD) |
|
LLAMA_FLAGS= |
|
HF_REPO= openlm-research/open_llama_7b_v2 |
|
HF_REF= main |
|
HF_FILES= pytorch_model-00001-of-00002.bin \ |
|
pytorch_model-00002-of-00002.bin \ |
|
tokenizer.model \ |
|
config.json \ |
|
tokenizer_config.json |
|
$(HF_FILES): SITE= https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF) |
|
$(LLAMA_TAR): SITE= https://github.com/ggerganov/llama.cpp/archive/refs/tags |
|
|
|
QUANTS= f16 q4_0 q4_1 q5_0 q5_1 q8_0 \ |
|
q2_K \ |
|
q3_K_S q3_K_M q3_K_L \ |
|
q4_K_S q4_K_M \ |
|
q5_K_S q5_K_M \ |
|
q6_K |
|
|
|
FILES= $(HF_FILES) $(LLAMA_TAR) |
|
MODEL_FILES= $(foreach q,$(QUANTS),$(MODEL_NAME)-$(q).gguf) |
|
|
|
.PHONY: all |
|
all: $(MODEL_FILES) SHA256SUMS |
|
|
|
$(FILES): |
|
curl -L -o $@ --url $(SITE)/$@ |
|
|
|
$(LLAMA_DIR): | $(LLAMA_TAR) |
|
tar -xf $(LLAMA_TAR) |
|
|
|
$(LLAMA_DIR)/quantize: | $(LLAMA_DIR) |
|
$(MAKE) -C $(LLAMA_DIR) $(LLAMA_FLAGS) quantize |
|
|
|
venv: |
|
$(PYTHON) -m venv venv |
|
venv/bin/pip install -e $(LLAMA_DIR)/gguf-py |
|
venv/bin/pip install -r $(LLAMA_DIR)/requirements.txt |
|
|
|
$(MODEL_NAME)-f16.gguf: $(HF_FILES) | $(LLAMA_DIR) venv |
|
venv/bin/python $(LLAMA_DIR)/convert.py --outtype f16 --outfile $@ . |
|
|
|
$(MODEL_NAME)-q%.gguf: $(MODEL_NAME)-f16.gguf | $(LLAMA_DIR)/quantize |
|
$(LLAMA_DIR)/quantize $< $@ q$* |
|
|
|
%.sha: % |
|
sha256sum $< > $@ |
|
|
|
SHA256SUMS: $(addsuffix .sha,$(MODEL_FILES)) |
|
cat $^ > $@ |
|
|