SlyEcho
/

open_llama_3b_ggml

SlyEcho commited on Jun 7, 2023

Commit

a8f706c

•

1 Parent(s): 6468837

Update to 1000T token final version

Files changed (11) hide show

.gitignore ADDED Viewed

+convert.py
+llama.cpp/
+pytorch_model.bin
+*.sha
+*.tar.gz
+tokenizer.model

Makefile ADDED Viewed

+MODEL_NAME=	open-llama-3b
+PYTHON?=	python
+LLAMA_TAG=	5c64a09
+LLAMA_TAR=	master-$(LLAMA_TAG).tar.gz
+HF_REPO=	openlm-research/open_llama_3b
+HF_REF=		main
+HF_FILES=	pytorch_model.bin tokenizer.model
+$(HF_FILES): 	SITE=	https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF)
+$(LLAMA_TAR): 	SITE=	https://github.com/ggerganov/llama.cpp/archive/refs/tags
+FILES= $(HF_FILES) $(LLAMA_TAR)
+QUANTS=		f16 q4_0 q4_1 q5_0 q5_1 q8_0
+MODEL_FILES=	$(addsuffix .bin,$(addprefix $(MODEL_NAME)-,$(QUANTS)))
+.PHONY: all
+all: $(MODEL_FILES) SHA256SUMS
+$(FILES):
+	curl -L -o $@ --url $(SITE)/$@
+llama.cpp: $(LLAMA_TAR)
+	mkdir -p $@
+	tar -xf $< --strip-components=1 -C $@
+llama.cpp/quantize: llama.cpp
+	$(MAKE) -C llama.cpp quantize
+convert.py: convert.py.diff | llama.cpp
+	patch -ru llama.cpp/convert.py -i $< -o $@
+$(MODEL_NAME)-f16.bin: $(HF_FILES) | convert.py
+	$(PYTHON) convert.py --outtype f16 --outfile $@ .
+$(MODEL_NAME)-q%.bin: $(MODEL_NAME)-f16.bin | llama.cpp/quantize
+	llama.cpp/quantize $< $@ q$*
+%.sha: %
+	sha256sum $< > $@
+SHA256SUMS: $(addsuffix .sha,$(MODEL_FILES))
+	cat $^ > $@

README.md CHANGED Viewed

@@ -4,11 +4,16 @@ license: apache-2.0
 # ggml versions of OpenLLaMa 3B
-- Version: 600b token preview
 - Project: [OpenLLaMA: An Open Reproduction of LLaMA](https://github.com/openlm-research/open_llama)
-- Model: [openlm-research/open_llama_3b_600bt_preview](https://huggingface.co/openlm-research/open_llama_3b_600bt_preview)
 - [llama.cpp](https://github.com/ggerganov/llama.cpp): build 607(ffb06a3) or later
 ## Use with llama.cpp
 Support is now merged to master branch.

 # ggml versions of OpenLLaMa 3B
+- Version: 1T tokens final version
 - Project: [OpenLLaMA: An Open Reproduction of LLaMA](https://github.com/openlm-research/open_llama)
+- Model: [openlm-research/open_llama_3b](https://huggingface.co/openlm-research/open_llama_3b)
 - [llama.cpp](https://github.com/ggerganov/llama.cpp): build 607(ffb06a3) or later
 ## Use with llama.cpp
 Support is now merged to master branch.
+## Newer quantizations
+There are now more quantization types in llama.cpp, some lower than 4 bits.
+Currently these are not supported, maybe because some weights have shapes that don't divide by 256.

SHA256SUMS CHANGED Viewed

@@ -1,6 +1,6 @@
-4461ccd289eed0190045fa79447262401fe432b63e6d9a7919637c420814e90b  open-llama-3b-f16.bin
-d4d4f2425f355dd57cae7c6766bbd99cf482c8b374cbf775c230f1a8c038c617  open-llama-3b-q8-0.bin
-0103204cb367a4ae78a6dcc107ee95a0f0f216e6d276082a534e0dc337dd7452  open-llama-3b-q5-1.bin
-7ed15048e392ce43abae56668f8df6cb0f7f1d48e4c8e924a9fc58a82510e6ac  open-llama-3b-q5-0.bin
-6e3b1e60f3135395bd32d8bb10388051c24b79bc5c0b5bc5e9cab11ebea253c3  open-llama-3b-q4-1.bin
-878a64232542f174ecd41ca76f18b959cdf41944fb878b5cf6cb89ab264bd59b  open-llama-3b-q4-0.bin

+f123887011114da3a9b3ffb06b6fc695aaecf0a5503c38e1589379f7304f37f6  open-llama-3b-f16.bin
+ec0460668c7fa50eee4d96b8687dbb29fa42b91c634151fe9c0c53cace0ab81a  open-llama-3b-q4_0.bin
+15325aeae94da8886ad94ac46491cd103111e82d3be910aaaaf7c3fa0bc3c128  open-llama-3b-q4_1.bin
+87d6146b47177611f8c4529fb6186a093c512bd09050e1625819e82a1946e2da  open-llama-3b-q5_0.bin
+ce5de27bfccd02a34465a6d8a80ac8ad0baff186e600c29f6e3c6740f5a2dfd4  open-llama-3b-q5_1.bin
+9bbe718478161752fcc085f4c7393a6472c0b4b003c43b681208617241884d7d  open-llama-3b-q8_0.bin

convert.py.diff ADDED Viewed

+--- a/convert.py	2023-05-30 20:48:07.687486627 +0300
++++ b/convert.py	2023-05-30 20:47:55.854142065 +0300
+@@ -143,12 +143,22 @@
+     def guessed(model: 'LazyModel', file_type: GGMLFileType) -> 'Params':
+         n_vocab, n_embd = model["tok_embeddings.weight"].shape
++        n_mult=256
++        n_head=n_embd // 128
++        n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
++
++        # TODO: hack for open_llama_3b
++        if n_embd == 3200:
++            n_mult = 216
++            n_head = 32
++            n_layer = 26
++
+         return Params(
+             n_vocab=n_vocab,
+             n_embd=n_embd,
+-            n_mult=256,
+-            n_head=n_embd // 128,
+-            n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model),
++            n_mult=n_mult,
++            n_head=n_head,
++            n_layer=n_layer,
+             file_type=file_type,
+         )
+@@ -597,7 +607,9 @@
+     out["norm.weight"] = model["model.norm.weight"]
+     out["output.weight"] = model["lm_head.weight"]
+-    n_head = model["model.layers.0.self_attn.q_proj.weight"].shape[1] // 128
++    # TODO: hack for open_llama_3b
++    n_embd = model["model.layers.0.self_attn.q_proj.weight"].shape[1]
++    n_head = 32 if n_embd == 3200 else n_embd // 128
+     for i in itertools.count():
+         if f"model.layers.{i}.self_attn.q_proj.weight" not in model:
+             break

open-llama-3b-f16.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4461ccd289eed0190045fa79447262401fe432b63e6d9a7919637c420814e90b
 size 6853758208

 version https://git-lfs.github.com/spec/v1
+oid sha256:f123887011114da3a9b3ffb06b6fc695aaecf0a5503c38e1589379f7304f37f6
 size 6853758208

open-llama-3b-q4_0.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:878a64232542f174ecd41ca76f18b959cdf41944fb878b5cf6cb89ab264bd59b
 size 1928446208

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec0460668c7fa50eee4d96b8687dbb29fa42b91c634151fe9c0c53cace0ab81a
 size 1928446208

open-llama-3b-q4_1.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e3b1e60f3135395bd32d8bb10388051c24b79bc5c0b5bc5e9cab11ebea253c3
 size 2142590208

 version https://git-lfs.github.com/spec/v1
+oid sha256:15325aeae94da8886ad94ac46491cd103111e82d3be910aaaaf7c3fa0bc3c128
 size 2142590208

open-llama-3b-q5_0.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ed15048e392ce43abae56668f8df6cb0f7f1d48e4c8e924a9fc58a82510e6ac
 size 2356734208

 version https://git-lfs.github.com/spec/v1
+oid sha256:87d6146b47177611f8c4529fb6186a093c512bd09050e1625819e82a1946e2da
 size 2356734208

open-llama-3b-q5_1.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0103204cb367a4ae78a6dcc107ee95a0f0f216e6d276082a534e0dc337dd7452
 size 2570878208

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce5de27bfccd02a34465a6d8a80ac8ad0baff186e600c29f6e3c6740f5a2dfd4
 size 2570878208

open-llama-3b-q8_0.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4d4f2425f355dd57cae7c6766bbd99cf482c8b374cbf775c230f1a8c038c617
 size 3641598208

 version https://git-lfs.github.com/spec/v1
+oid sha256:9bbe718478161752fcc085f4c7393a6472c0b4b003c43b681208617241884d7d
 size 3641598208