include llamafile repo as a submodule and make build process more self contained
Browse files- .args +0 -1
- .gitmodules +3 -0
- TinyLLama-v0-5M-F16.llamafile +2 -2
- llama.cpp +1 -1
- llamafile +1 -0
- llamafile-creation.sh +28 -12
.args
CHANGED
@@ -1,3 +1,2 @@
|
|
1 |
-m
|
2 |
TinyLLama-v0-5M-F16.gguf
|
3 |
-
...
|
|
|
1 |
-m
|
2 |
TinyLLama-v0-5M-F16.gguf
|
|
.gitmodules
CHANGED
@@ -4,3 +4,6 @@
|
|
4 |
[submodule "llama.cpp"]
|
5 |
path = llama.cpp
|
6 |
url = git@github.com:mofosyne/llama.cpp.git
|
|
|
|
|
|
|
|
4 |
[submodule "llama.cpp"]
|
5 |
path = llama.cpp
|
6 |
url = git@github.com:mofosyne/llama.cpp.git
|
7 |
+
[submodule "llamafile"]
|
8 |
+
path = llamafile
|
9 |
+
url = git@github.com:Mozilla-Ocho/llamafile.git
|
TinyLLama-v0-5M-F16.llamafile
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f098bd53e7e689289be7322d19304eeea6f3b305ddbcfda4a15a452a1be35d1
|
3 |
+
size 17633572
|
llama.cpp
CHANGED
@@ -1 +1 @@
|
|
1 |
-
Subproject commit
|
|
|
1 |
+
Subproject commit da064a809badd5086d61fd82accbb16ad93cde94
|
llamafile
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit cb92b32a6dfae4dff06d7333afe51f2b7224f709
|
llamafile-creation.sh
CHANGED
@@ -1,30 +1,46 @@
|
|
1 |
-
#!/bin/
|
2 |
|
|
|
|
|
3 |
# Pull both the model folder and llama.cpp (for the conversion script)
|
4 |
-
git submodule update --init
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
|
|
|
|
|
11 |
mv maykeye_tinyllama/TinyLLama-v0-5M-F16.gguf TinyLLama-v0-5M-F16.gguf
|
12 |
|
13 |
-
|
14 |
-
cp /
|
15 |
|
16 |
# Create an .args file with settings defaults
|
17 |
cat >.args <<EOF
|
18 |
-m
|
19 |
TinyLLama-v0-5M-F16.gguf
|
20 |
-
...
|
21 |
EOF
|
22 |
|
23 |
-
#
|
24 |
zipalign -j0 \
|
25 |
TinyLLama-v0-5M-F16.llamafile \
|
26 |
TinyLLama-v0-5M-F16.gguf \
|
27 |
.args
|
28 |
|
29 |
-
|
30 |
-
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
|
3 |
+
###############################################################################
|
4 |
+
echo == Prep Enviroment ==
|
5 |
# Pull both the model folder and llama.cpp (for the conversion script)
|
6 |
+
#git submodule update --init
|
7 |
|
8 |
+
###############################################################################
|
9 |
+
echo == Build and prep the llamafile engine execuable ==
|
10 |
+
pushd llamafile
|
11 |
+
make -j8
|
12 |
+
make
|
13 |
+
# This is where each executables is located for reference purpose for now as of 2024-04-05
|
14 |
+
# and was determined by running `sudo make install PREFIX=/usr/local`
|
15 |
+
# ./o/llamafile/zipalign --> /usr/local/bin/zipalign
|
16 |
+
# ./o/llama.cpp/main/main --> /usr/local/bin/llamafile
|
17 |
+
# ./o/llama.cpp/imatrix/imatrix --> /usr/local/bin/llamafile-imatrix
|
18 |
+
# ./o/llama.cpp/quantize/quantize --> /usr/local/bin/llamafile-quantize
|
19 |
+
# ./build/llamafile-convert --> /usr/local/bin/llamafile-convert
|
20 |
+
# ./o/llama.cpp/perplexity/perplexity --> /usr/local/bin/llamafile-perplexity
|
21 |
+
# ./o/llama.cpp/llava/llava-quantize --> /usr/local/bin/llava-quantize
|
22 |
+
popd
|
23 |
|
24 |
+
###############################################################################
|
25 |
+
echo == Convert from safetensor to gguf ==
|
26 |
+
./llama.cpp/convert.py maykeye_tinyllama --outtype f16 --metadata maykeye_tinyllama-metadata.json
|
27 |
mv maykeye_tinyllama/TinyLLama-v0-5M-F16.gguf TinyLLama-v0-5M-F16.gguf
|
28 |
|
29 |
+
echo == Generating Llamafile ==
|
30 |
+
cp ./llamafile/o/llama.cpp/main/main TinyLLama-v0-5M-F16.llamafile
|
31 |
|
32 |
# Create an .args file with settings defaults
|
33 |
cat >.args <<EOF
|
34 |
-m
|
35 |
TinyLLama-v0-5M-F16.gguf
|
|
|
36 |
EOF
|
37 |
|
38 |
+
# zip align engine, gguf and default args
|
39 |
zipalign -j0 \
|
40 |
TinyLLama-v0-5M-F16.llamafile \
|
41 |
TinyLLama-v0-5M-F16.gguf \
|
42 |
.args
|
43 |
|
44 |
+
###############################################################################
|
45 |
+
echo == Test Output ==
|
46 |
+
./TinyLLama-v0-5M-F16.llamafile --cli -p "hello world the gruff man said"
|