open_llama_3b_ggml / Makefile
SlyEcho's picture
Update to 1000T token final version
a8f706c verified
raw
history blame
1.08 kB
MODEL_NAME= open-llama-3b
PYTHON?= python
LLAMA_TAG= 5c64a09
LLAMA_TAR= master-$(LLAMA_TAG).tar.gz
HF_REPO= openlm-research/open_llama_3b
HF_REF= main
HF_FILES= pytorch_model.bin tokenizer.model
$(HF_FILES): SITE= https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF)
$(LLAMA_TAR): SITE= https://github.com/ggerganov/llama.cpp/archive/refs/tags
FILES= $(HF_FILES) $(LLAMA_TAR)
QUANTS= f16 q4_0 q4_1 q5_0 q5_1 q8_0
MODEL_FILES= $(addsuffix .bin,$(addprefix $(MODEL_NAME)-,$(QUANTS)))
.PHONY: all
all: $(MODEL_FILES) SHA256SUMS
$(FILES):
curl -L -o $@ --url $(SITE)/$@
llama.cpp: $(LLAMA_TAR)
mkdir -p $@
tar -xf $< --strip-components=1 -C $@
llama.cpp/quantize: llama.cpp
$(MAKE) -C llama.cpp quantize
convert.py: convert.py.diff | llama.cpp
patch -ru llama.cpp/convert.py -i $< -o $@
$(MODEL_NAME)-f16.bin: $(HF_FILES) | convert.py
$(PYTHON) convert.py --outtype f16 --outfile $@ .
$(MODEL_NAME)-q%.bin: $(MODEL_NAME)-f16.bin | llama.cpp/quantize
llama.cpp/quantize $< $@ q$*
%.sha: %
sha256sum $< > $@
SHA256SUMS: $(addsuffix .sha,$(MODEL_FILES))
cat $^ > $@