test new proposed outfile naming convention changes to llama.cpp conversion script

Browse files

Files changed (9) hide show

.args +1 -1
.gitmodules +3 -3
TinyLLama-v0.1-5M-F16.gguf → Tinyllama-5M-v0.2-F16.gguf +2 -2
TinyLLama-v0.1-5M-F16.llamafile → Tinyllama-5M-v0.2-F16.llamafile +2 -2
llama.cpp +1 -1
llamafile +1 -1
llamafile-creation-legacy.sh +52 -0
llamafile-creation.sh +2 -2
maykeye_tinyllama-metadata.json +2 -2

.args CHANGED Viewed

	@@ -1,2 +1,2 @@
1	-m
2	- ~~TinyLLama~~-v0.1-~~5M-~~F16.gguf


1	-m
2	+ Tinyllama-5M-v0.2-F16.gguf

.gitmodules CHANGED Viewed

@@ -1,9 +1,9 @@
 [submodule "maykeye_tinyllama"]
 	path = maykeye_tinyllama
 	url = https://huggingface.co/Maykeye/TinyLLama-v0
-[submodule "llama.cpp"]
-	path = llama.cpp
-	url = git@github.com:ggerganov/llama.cpp.git
 [submodule "llamafile"]
 	path = llamafile
 	url = git@github.com:Mozilla-Ocho/llamafile.git

 [submodule "maykeye_tinyllama"]
 	path = maykeye_tinyllama
 	url = https://huggingface.co/Maykeye/TinyLLama-v0
 [submodule "llamafile"]
 	path = llamafile
 	url = git@github.com:Mozilla-Ocho/llamafile.git
+[submodule "llama.cpp"]
+	path = llama.cpp
+	url = git@github.com:mofosyne/llama.cpp.git

TinyLLama-v0.1-5M-F16.gguf → Tinyllama-5M-v0.2-F16.gguf RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0425ad47400ab633ed323cb6b64c84d7f1369c8f5c69108136f4fcef2ddee43
-size 10008160

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0422a7e84b0d8a6ebc77513ec4fe53979850b9f6235265eba76e3e954072f86
+size 10008256

TinyLLama-v0.1-5M-F16.llamafile → Tinyllama-5M-v0.2-F16.llamafile RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb08aa9d2d3a6850648ade72885a39abf5b8aac8450a8b05ffccbaa96c4080d3
-size 18560831

 version https://git-lfs.github.com/spec/v1
+oid sha256:0098e4ca8bbf84474fc65105d9149047d73ad964481182b954efae4ab5a9bfe9
+size 19281727

llama.cpp CHANGED Viewed

	@@ -1 +1 @@
1	- Subproject commit ~~b1f8af1886e8187db6bb2a9b87cfc1c0f175f629~~


1	+ Subproject commit 00ff73a90101c76108131a5867a3c3c78a42ee8c

llamafile CHANGED Viewed

	@@ -1 +1 @@
1	- Subproject commit ~~d4099feff737f3be9a4bed017f1315a4d2cb773a~~


1	+ Subproject commit 9cd8d70942a049ba3c3bddd12e87e1fb599fbd49

llamafile-creation-legacy.sh ADDED Viewed

	@@ -0,0 +1,52 @@

+#!/bin/bash
+MODEL_DIR="maykeye_tinyllama"
+METADATA_FILE="maykeye_tinyllama-metadata.json"
+###############################################################################
+# Pull both model folder, llamafile (for the engine) and llama.cpp (for the conversion script)
+echo == Prep Enviroment ==
+git submodule update --init
+###############################################################################
+echo == Build and prep the llamafile engine execuable ==
+pushd llamafile
+make -j8
+make
+# This is where each executables is located for reference purpose for now as of 2024-04-05
+# and was determined by running `sudo make install PREFIX=/usr/local`
+# ./o/llamafile/zipalign --> /usr/local/bin/zipalign
+# ./o/llama.cpp/main/main --> /usr/local/bin/llamafile
+# ./o/llama.cpp/imatrix/imatrix --> /usr/local/bin/llamafile-imatrix
+# ./o/llama.cpp/quantize/quantize --> /usr/local/bin/llamafile-quantize
+# ./build/llamafile-convert --> /usr/local/bin/llamafile-convert
+# ./o/llama.cpp/perplexity/perplexity --> /usr/local/bin/llamafile-perplexity
+# ./o/llama.cpp/llava/llava-quantize --> /usr/local/bin/llava-quantize
+popd
+###############################################################################
+echo == What is our llamafile name going to be? ==
+OUTFILE=$(./llama.cpp/examples/convert-legacy-llama.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --get-outfile)
+echo We will be aiming to generate $OUTFILE.llamafile
+###############################################################################
+echo == Convert from safetensor to gguf ==
+./llama.cpp/examples/convert-legacy-llama.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16
+mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf
+###############################################################################
+echo == Generating Llamafile ==
+cp ./llamafile/o/llama.cpp/main/main ${OUTFILE}.llamafile
+# Create an .args file with settings defaults
+cat >.args <<EOF
+-m
+${OUTFILE}.gguf
+EOF
+# zip align engine, gguf and default args
+./llamafile/o/llamafile/zipalign -j0 ${OUTFILE}.llamafile ${OUTFILE}.gguf .args
+###############################################################################
+echo == Test Output ==
+./${OUTFILE}.llamafile --cli -p "hello world the gruff man said"

llamafile-creation.sh CHANGED Viewed

@@ -26,12 +26,12 @@ popd
 ###############################################################################
 echo == What is our llamafile name going to be? ==
-OUTFILE=$(./llama.cpp/convert.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --get-outfile)
 echo We will be aiming to generate $OUTFILE.llamafile
 ###############################################################################
 echo == Convert from safetensor to gguf ==
-./llama.cpp/convert.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16
 mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf
 ###############################################################################

 ###############################################################################
 echo == What is our llamafile name going to be? ==
+OUTFILE=$(./llama.cpp/convert-hf-to-gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --get-outfile)
 echo We will be aiming to generate $OUTFILE.llamafile
 ###############################################################################
 echo == Convert from safetensor to gguf ==
+./llama.cpp/convert-hf-to-gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --verbose
 mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf
 ###############################################################################

maykeye_tinyllama-metadata.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
     "general.name": "TinyLLama",
-    "general.version": "v0.1",
     "general.author": "mofosyne",
     "general.url": "https://huggingface.co/mofosyne/TinyLLama-v0-llamafile",
     "general.description": "This gguf is ported from a first version of Maykeye attempt at recreating roneneldan/TinyStories-1M but using Llama architecture",
     "general.license": "apache-2.0",
     "general.source.url": "https://huggingface.co/Maykeye/TinyLLama-v0",
-    "general.source.huggingface.repository": "https://huggingface.co/Maykeye/TinyLLama-v0"
 }

 {
     "general.name": "TinyLLama",
+    "general.version": "v0.2",
     "general.author": "mofosyne",
     "general.url": "https://huggingface.co/mofosyne/TinyLLama-v0-llamafile",
     "general.description": "This gguf is ported from a first version of Maykeye attempt at recreating roneneldan/TinyStories-1M but using Llama architecture",
     "general.license": "apache-2.0",
     "general.source.url": "https://huggingface.co/Maykeye/TinyLLama-v0",
+    "general.source.huggingface.repository": "Maykeye/TinyLLama-v0"
 }