include llamafile repo as a submodule and make build process more self contained

Browse files

Files changed (6) hide show

.args +0 -1
.gitmodules +3 -0
TinyLLama-v0-5M-F16.llamafile +2 -2
llama.cpp +1 -1
llamafile +1 -0
llamafile-creation.sh +28 -12

.args CHANGED Viewed

@@ -1,3 +1,2 @@
 -m
 TinyLLama-v0-5M-F16.gguf
-...


1	-m
2	TinyLLama-v0-5M-F16.gguf

.gitmodules CHANGED Viewed

@@ -4,3 +4,6 @@
 [submodule "llama.cpp"]
 	path = llama.cpp
 	url = git@github.com:mofosyne/llama.cpp.git

 [submodule "llama.cpp"]
 	path = llama.cpp
 	url = git@github.com:mofosyne/llama.cpp.git
+[submodule "llamafile"]
+	path = llamafile
+	url = git@github.com:Mozilla-Ocho/llamafile.git

TinyLLama-v0-5M-F16.llamafile CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9847179c1f3a04f49deb90efa62173d02e10f272fe6cfe6325c6277cebd1b054
-size 17633471

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f098bd53e7e689289be7322d19304eeea6f3b305ddbcfda4a15a452a1be35d1
+size 17633572

llama.cpp CHANGED Viewed

	@@ -1 +1 @@
1	- Subproject commit ~~8f4412980b41ccdc164ff220bfcd564f2a4a86cb~~


1	+ Subproject commit da064a809badd5086d61fd82accbb16ad93cde94

llamafile ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit cb92b32a6dfae4dff06d7333afe51f2b7224f709

llamafile-creation.sh CHANGED Viewed

@@ -1,30 +1,46 @@
-#!/bin/sh
 # Pull both the model folder and llama.cpp (for the conversion script)
-git submodule update --init
-# Convert from safetensor to gguf
-# (Assuming llama.cpp is in the next folder)
-./llama.cpp/convert.py maykeye_tinyllama --metadata maykeye_tinyllama-metadata.json
-# Copy the generated gguf to this folder
 mv maykeye_tinyllama/TinyLLama-v0-5M-F16.gguf TinyLLama-v0-5M-F16.gguf
-# Get the llamafile engine
-cp /usr/local/bin/llamafile TinyLLama-v0-5M-F16.llamafile
 # Create an .args file with settings defaults
 cat >.args <<EOF
 -m
 TinyLLama-v0-5M-F16.gguf
-...
 EOF
-# Combine
 zipalign -j0 \
   TinyLLama-v0-5M-F16.llamafile \
   TinyLLama-v0-5M-F16.gguf \
   .args
-# Test
-./TinyLLama-v0-5M-F16.llamafile --cli -p "hello world the gruff man said"

+#!/bin/bash
+###############################################################################
+echo == Prep Enviroment ==
 # Pull both the model folder and llama.cpp (for the conversion script)
+#git submodule update --init
+###############################################################################
+echo == Build and prep the llamafile engine execuable ==
+pushd llamafile
+make -j8
+make
+# This is where each executables is located for reference purpose for now as of 2024-04-05
+# and was determined by running `sudo make install PREFIX=/usr/local`
+# ./o/llamafile/zipalign --> /usr/local/bin/zipalign
+# ./o/llama.cpp/main/main --> /usr/local/bin/llamafile
+# ./o/llama.cpp/imatrix/imatrix --> /usr/local/bin/llamafile-imatrix
+# ./o/llama.cpp/quantize/quantize --> /usr/local/bin/llamafile-quantize
+# ./build/llamafile-convert --> /usr/local/bin/llamafile-convert
+# ./o/llama.cpp/perplexity/perplexity --> /usr/local/bin/llamafile-perplexity
+# ./o/llama.cpp/llava/llava-quantize --> /usr/local/bin/llava-quantize
+popd
+###############################################################################
+echo == Convert from safetensor to gguf ==
+./llama.cpp/convert.py maykeye_tinyllama --outtype f16 --metadata maykeye_tinyllama-metadata.json
 mv maykeye_tinyllama/TinyLLama-v0-5M-F16.gguf TinyLLama-v0-5M-F16.gguf
+echo == Generating Llamafile ==
+cp ./llamafile/o/llama.cpp/main/main TinyLLama-v0-5M-F16.llamafile
 # Create an .args file with settings defaults
 cat >.args <<EOF
 -m
 TinyLLama-v0-5M-F16.gguf
 EOF
+# zip align engine, gguf and default args
 zipalign -j0 \
   TinyLLama-v0-5M-F16.llamafile \
   TinyLLama-v0-5M-F16.gguf \
   .args
+###############################################################################
+echo == Test Output ==
+./TinyLLama-v0-5M-F16.llamafile --cli -p "hello world the gruff man said"