SlyEcho commited on
Commit
a8f706c
1 Parent(s): 6468837

Update to 1000T token final version

Browse files
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ convert.py
2
+ llama.cpp/
3
+ pytorch_model.bin
4
+ *.sha
5
+ *.tar.gz
6
+ tokenizer.model
Makefile ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL_NAME= open-llama-3b
2
+ PYTHON?= python
3
+ LLAMA_TAG= 5c64a09
4
+ LLAMA_TAR= master-$(LLAMA_TAG).tar.gz
5
+ HF_REPO= openlm-research/open_llama_3b
6
+ HF_REF= main
7
+ HF_FILES= pytorch_model.bin tokenizer.model
8
+ $(HF_FILES): SITE= https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF)
9
+ $(LLAMA_TAR): SITE= https://github.com/ggerganov/llama.cpp/archive/refs/tags
10
+
11
+ FILES= $(HF_FILES) $(LLAMA_TAR)
12
+
13
+ QUANTS= f16 q4_0 q4_1 q5_0 q5_1 q8_0
14
+ MODEL_FILES= $(addsuffix .bin,$(addprefix $(MODEL_NAME)-,$(QUANTS)))
15
+
16
+ .PHONY: all
17
+ all: $(MODEL_FILES) SHA256SUMS
18
+
19
+ $(FILES):
20
+ curl -L -o $@ --url $(SITE)/$@
21
+
22
+ llama.cpp: $(LLAMA_TAR)
23
+ mkdir -p $@
24
+ tar -xf $< --strip-components=1 -C $@
25
+
26
+ llama.cpp/quantize: llama.cpp
27
+ $(MAKE) -C llama.cpp quantize
28
+
29
+ convert.py: convert.py.diff | llama.cpp
30
+ patch -ru llama.cpp/convert.py -i $< -o $@
31
+
32
+ $(MODEL_NAME)-f16.bin: $(HF_FILES) | convert.py
33
+ $(PYTHON) convert.py --outtype f16 --outfile $@ .
34
+
35
+ $(MODEL_NAME)-q%.bin: $(MODEL_NAME)-f16.bin | llama.cpp/quantize
36
+ llama.cpp/quantize $< $@ q$*
37
+
38
+ %.sha: %
39
+ sha256sum $< > $@
40
+
41
+ SHA256SUMS: $(addsuffix .sha,$(MODEL_FILES))
42
+ cat $^ > $@
README.md CHANGED
@@ -4,11 +4,16 @@ license: apache-2.0
4
 
5
  # ggml versions of OpenLLaMa 3B
6
 
7
- - Version: 600b token preview
8
  - Project: [OpenLLaMA: An Open Reproduction of LLaMA](https://github.com/openlm-research/open_llama)
9
- - Model: [openlm-research/open_llama_3b_600bt_preview](https://huggingface.co/openlm-research/open_llama_3b_600bt_preview)
10
  - [llama.cpp](https://github.com/ggerganov/llama.cpp): build 607(ffb06a3) or later
11
 
12
  ## Use with llama.cpp
13
 
14
  Support is now merged to master branch.
 
 
 
 
 
 
4
 
5
  # ggml versions of OpenLLaMa 3B
6
 
7
+ - Version: 1T tokens final version
8
  - Project: [OpenLLaMA: An Open Reproduction of LLaMA](https://github.com/openlm-research/open_llama)
9
+ - Model: [openlm-research/open_llama_3b](https://huggingface.co/openlm-research/open_llama_3b)
10
  - [llama.cpp](https://github.com/ggerganov/llama.cpp): build 607(ffb06a3) or later
11
 
12
  ## Use with llama.cpp
13
 
14
  Support is now merged to master branch.
15
+
16
+ ## Newer quantizations
17
+
18
+ There are now more quantization types in llama.cpp, some lower than 4 bits.
19
+ Currently these are not supported, maybe because some weights have shapes that don't divide by 256.
SHA256SUMS CHANGED
@@ -1,6 +1,6 @@
1
- 4461ccd289eed0190045fa79447262401fe432b63e6d9a7919637c420814e90b open-llama-3b-f16.bin
2
- d4d4f2425f355dd57cae7c6766bbd99cf482c8b374cbf775c230f1a8c038c617 open-llama-3b-q8-0.bin
3
- 0103204cb367a4ae78a6dcc107ee95a0f0f216e6d276082a534e0dc337dd7452 open-llama-3b-q5-1.bin
4
- 7ed15048e392ce43abae56668f8df6cb0f7f1d48e4c8e924a9fc58a82510e6ac open-llama-3b-q5-0.bin
5
- 6e3b1e60f3135395bd32d8bb10388051c24b79bc5c0b5bc5e9cab11ebea253c3 open-llama-3b-q4-1.bin
6
- 878a64232542f174ecd41ca76f18b959cdf41944fb878b5cf6cb89ab264bd59b open-llama-3b-q4-0.bin
 
1
+ f123887011114da3a9b3ffb06b6fc695aaecf0a5503c38e1589379f7304f37f6 open-llama-3b-f16.bin
2
+ ec0460668c7fa50eee4d96b8687dbb29fa42b91c634151fe9c0c53cace0ab81a open-llama-3b-q4_0.bin
3
+ 15325aeae94da8886ad94ac46491cd103111e82d3be910aaaaf7c3fa0bc3c128 open-llama-3b-q4_1.bin
4
+ 87d6146b47177611f8c4529fb6186a093c512bd09050e1625819e82a1946e2da open-llama-3b-q5_0.bin
5
+ ce5de27bfccd02a34465a6d8a80ac8ad0baff186e600c29f6e3c6740f5a2dfd4 open-llama-3b-q5_1.bin
6
+ 9bbe718478161752fcc085f4c7393a6472c0b4b003c43b681208617241884d7d open-llama-3b-q8_0.bin
convert.py.diff ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --- a/convert.py 2023-05-30 20:48:07.687486627 +0300
2
+ +++ b/convert.py 2023-05-30 20:47:55.854142065 +0300
3
+ @@ -143,12 +143,22 @@
4
+ def guessed(model: 'LazyModel', file_type: GGMLFileType) -> 'Params':
5
+ n_vocab, n_embd = model["tok_embeddings.weight"].shape
6
+
7
+ + n_mult=256
8
+ + n_head=n_embd // 128
9
+ + n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
10
+ +
11
+ + # TODO: hack for open_llama_3b
12
+ + if n_embd == 3200:
13
+ + n_mult = 216
14
+ + n_head = 32
15
+ + n_layer = 26
16
+ +
17
+ return Params(
18
+ n_vocab=n_vocab,
19
+ n_embd=n_embd,
20
+ - n_mult=256,
21
+ - n_head=n_embd // 128,
22
+ - n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model),
23
+ + n_mult=n_mult,
24
+ + n_head=n_head,
25
+ + n_layer=n_layer,
26
+ file_type=file_type,
27
+ )
28
+
29
+ @@ -597,7 +607,9 @@
30
+ out["norm.weight"] = model["model.norm.weight"]
31
+ out["output.weight"] = model["lm_head.weight"]
32
+
33
+ - n_head = model["model.layers.0.self_attn.q_proj.weight"].shape[1] // 128
34
+ + # TODO: hack for open_llama_3b
35
+ + n_embd = model["model.layers.0.self_attn.q_proj.weight"].shape[1]
36
+ + n_head = 32 if n_embd == 3200 else n_embd // 128
37
+ for i in itertools.count():
38
+ if f"model.layers.{i}.self_attn.q_proj.weight" not in model:
39
+ break
open-llama-3b-f16.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4461ccd289eed0190045fa79447262401fe432b63e6d9a7919637c420814e90b
3
  size 6853758208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f123887011114da3a9b3ffb06b6fc695aaecf0a5503c38e1589379f7304f37f6
3
  size 6853758208
open-llama-3b-q4_0.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:878a64232542f174ecd41ca76f18b959cdf41944fb878b5cf6cb89ab264bd59b
3
  size 1928446208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec0460668c7fa50eee4d96b8687dbb29fa42b91c634151fe9c0c53cace0ab81a
3
  size 1928446208
open-llama-3b-q4_1.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e3b1e60f3135395bd32d8bb10388051c24b79bc5c0b5bc5e9cab11ebea253c3
3
  size 2142590208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15325aeae94da8886ad94ac46491cd103111e82d3be910aaaaf7c3fa0bc3c128
3
  size 2142590208
open-llama-3b-q5_0.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ed15048e392ce43abae56668f8df6cb0f7f1d48e4c8e924a9fc58a82510e6ac
3
  size 2356734208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87d6146b47177611f8c4529fb6186a093c512bd09050e1625819e82a1946e2da
3
  size 2356734208
open-llama-3b-q5_1.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0103204cb367a4ae78a6dcc107ee95a0f0f216e6d276082a534e0dc337dd7452
3
  size 2570878208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce5de27bfccd02a34465a6d8a80ac8ad0baff186e600c29f6e3c6740f5a2dfd4
3
  size 2570878208
open-llama-3b-q8_0.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4d4f2425f355dd57cae7c6766bbd99cf482c8b374cbf775c230f1a8c038c617
3
  size 3641598208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bbe718478161752fcc085f4c7393a6472c0b4b003c43b681208617241884d7d
3
  size 3641598208