initial commit

Browse files

Files changed (10) hide show

.gitignore +7 -0
Makefile +42 -0
README.md +27 -0
SHA256SUMS +6 -0
llongma-3b-f16.bin +3 -0
llongma-3b-q4_0.bin +3 -0
llongma-3b-q4_1.bin +3 -0
llongma-3b-q5_0.bin +3 -0
llongma-3b-q5_1.bin +3 -0
llongma-3b-q8_0.bin +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+convert.py
+llama.cpp*/
+pytorch_model.bin
+*.sha
+*.tar.gz
+tokenizer.model
+config.json

Makefile ADDED Viewed

	@@ -0,0 +1,42 @@

+MODEL_NAME=	llongma-3b
+PYTHON?=	python
+LLAMA_TAG=	7487137
+LLAMA_TAR=	master-$(LLAMA_TAG).tar.gz
+LLAMA_DIR=	llama.cpp-master-$(LLAMA_TAG)
+LLAMA_FLAGS=	LLAMA_NO_K_QUANTS=1
+HF_REPO=	conceptofmind/LLongMA-3b
+HF_REF=		main
+HF_FILES=	pytorch_model.bin \
+		tokenizer.model \
+		config.json
+$(HF_FILES): 	SITE=	https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF)
+$(LLAMA_TAR): 	SITE=	https://github.com/ggerganov/llama.cpp/archive/refs/tags
+FILES= $(HF_FILES) $(LLAMA_TAR)
+QUANTS=		f16 q4_0 q4_1 q5_0 q5_1 q8_0
+MODEL_FILES=	$(foreach q,$(QUANTS),$(MODEL_NAME)-$(q).bin)
+.PHONY: all
+all: $(MODEL_FILES) SHA256SUMS
+$(FILES):
+	curl -L -o $@ --url $(SITE)/$@
+$(LLAMA_DIR): | $(LLAMA_TAR)
+	tar -xf $(LLAMA_TAR)
+$(LLAMA_DIR)/quantize: | $(LLAMA_DIR)
+	$(MAKE) -C $(LLAMA_DIR) $(LLAMA_FLAGS) quantize
+$(MODEL_NAME)-f16.bin: $(HF_FILES) | $(LLAMA_DIR)
+	$(PYTHON) $(LLAMA_DIR)/convert.py --outtype f16 --outfile $@ .
+$(MODEL_NAME)-q%.bin: $(MODEL_NAME)-f16.bin $(LLAMA_DIR)/quantize
+	$(LLAMA_DIR)/quantize $< $@ q$*
+%.sha: %
+	sha256sum $< > $@
+SHA256SUMS: $(addsuffix .sha,$(MODEL_FILES))
+	cat $^ > $@

README.md CHANGED Viewed

@@ -1,3 +1,30 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
 ---
+# ggml versions of LLongMA-3b
+- Announcement: [Tweet by @EnricoShippole](https://twitter.com/EnricoShippole/status/1677346578720256000)
+- Model: [conceptofmind/LLongMA-3b](https://huggingface.co/conceptofmind/LLongMA-3b) (license not specified)
+- Base Model: [openlm-research/open_llama_3b](https://huggingface.co/openlm-research/open_llama_3b), project: [OpenLLaMA: An Open Reproduction of LLaMA](https://github.com/openlm-research/open_llama) (Apache 2.0)
+- Tuning dataset: [togethercomputer/RedPajama-Data-1T](https://huggingface.co/togethercomputer/RedPajama-Data-1T) (various licenses)
+- [llama.cpp](https://github.com/ggerganov/llama.cpp): 3B model size: build 607(ffb06a3) or later, extended context: N/A
+- Context length: 8192 token extended length model.
+- Type: foundational
+## Extended context
+This model uses an extended context by [scaling the position index](https://kaiokendev.github.io/context) in the RoPE algorithm by 1/4 to extend it from 2048 tokens of the original LLaMA models to 8192 tokens.
+For the best results the model should undergo an additional finetuning training step.
+This was achieved with training with 1 billion tokens of the RedPajama-1T dataset (OpenLLaMA 3B full training was 1 trillion tokens).
+To enable this in llama.cpp is an ongoing development effort.
+You can track it in PR [#2054](https://github.com/ggerganov/llama.cpp/pull/2054).
+It should be enabled with the flags `-c 8192 --rope-freq-scale 0.25` if it is correct.
+## K-quants
+There are now more quantization types in llama.cpp, some lower than 4 bits.
+Currently these are not well supported because of technical reasons.
+If you want to use them, you have to build llama.cpp (from build 829 (ff5d58f)) with the `LLAMA_QKK_64` Make or CMake variable enabled (see PR [#2001](https://github.com/ggerganov/llama.cpp/pull/2001)).
+Then you can quantize the F16 or maybe Q8_0 version to what you want.

SHA256SUMS ADDED Viewed

	@@ -0,0 +1,6 @@

+563a2751c5dd1134f8d6768289af541fa7875d8b7b049f1f3393e305a0a34b17  llongma-3b-f16.bin
+9a4f502fcbc81e279c04e4d677abaa6643eafa7cd99f82e843cc2d06e4d86990  llongma-3b-q4_0.bin
+73c800127f6151fb520614e0d804744bf4f4e0f8e07fde272fdd38659030050a  llongma-3b-q4_1.bin
+fec1fd168514097c8d85d08cee3a794dd9e04c70b89509afeb51a494c9fbfb7b  llongma-3b-q5_0.bin
+ecc0e914866c0974c6a717d55803add9d80f826f9e5d6d1bf6b239c47bbf88e5  llongma-3b-q5_1.bin
+70003de74d67bedc9d85d8d198060484c4cb5d12c2e0f87f09cda0236b3e4aaf  llongma-3b-q8_0.bin

llongma-3b-f16.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:563a2751c5dd1134f8d6768289af541fa7875d8b7b049f1f3393e305a0a34b17
+size 6853758208

llongma-3b-q4_0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a4f502fcbc81e279c04e4d677abaa6643eafa7cd99f82e843cc2d06e4d86990
+size 1928446208

llongma-3b-q4_1.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73c800127f6151fb520614e0d804744bf4f4e0f8e07fde272fdd38659030050a
+size 2142590208

llongma-3b-q5_0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fec1fd168514097c8d85d08cee3a794dd9e04c70b89509afeb51a494c9fbfb7b
+size 2356734208

llongma-3b-q5_1.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecc0e914866c0974c6a717d55803add9d80f826f9e5d6d1bf6b239c47bbf88e5
+size 2570878208

llongma-3b-q8_0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:70003de74d67bedc9d85d8d198060484c4cb5d12c2e0f87f09cda0236b3e4aaf
+size 3641598208