File size: 2,658 Bytes
5d5e38b 7585b58 15be350 5d5e38b 5a4673f 5d5e38b 8f0017d 7585b58 5d5e38b 7585b58 15be350 aec30a9 15be350 5d5e38b ecb359f 15be350 7585b58 19c0773 d223d66 5d5e38b 15be350 7585b58 707f408 15be350 707f408 5d5e38b 15be350 7585b58 5d5e38b aec30a9 ecb359f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
#!/bin/bash
MODEL_DIR="maykeye_tinyllama"
METADATA_FILE="maykeye_tinyllama-metadata.json"
###############################################################################
# Pull both model folder, llamafile (for the engine) and llama.cpp (for the conversion script)
echo == Prep Enviroment ==
git submodule update --init
###############################################################################
echo == Build and prep the llamafile engine execuable ==
pushd llamafile
make -j8
make
# This is where each executables is located for reference purpose for now as of 2024-04-05
# and was determined by running `sudo make install PREFIX=/usr/local`
# ./o/llamafile/zipalign --> /usr/local/bin/zipalign
# ./o/llama.cpp/main/main --> /usr/local/bin/llamafile
# ./o/llama.cpp/imatrix/imatrix --> /usr/local/bin/llamafile-imatrix
# ./o/llama.cpp/quantize/quantize --> /usr/local/bin/llamafile-quantize
# ./build/llamafile-convert --> /usr/local/bin/llamafile-convert
# ./o/llama.cpp/perplexity/perplexity --> /usr/local/bin/llamafile-perplexity
# ./o/llama.cpp/llava/llava-quantize --> /usr/local/bin/llava-quantize
popd
###############################################################################
echo == What is our llamafile name going to be? ==
./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --dry-run 2>/dev/null
OUTFILE_PATH=$(./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --dry-run 2>/dev/null)
OUTFILE_FILE=$(basename ${OUTFILE_PATH})
OUTFILE="${OUTFILE_FILE%.gguf}"
echo We will be aiming to generate $OUTFILE.llamafile
###############################################################################
echo == Convert from safetensor to gguf ==
./llama.cpp/convert_hf_to_gguf.py ${MODEL_DIR} --metadata ${METADATA_FILE} --outtype f16 --verbose &>> convert_hf_to_gguf.output.txt
mv ${MODEL_DIR}/${OUTFILE}.gguf ${OUTFILE}.gguf
# Generate Diagnostics Dumpfile
./llama.cpp/gguf-py/scripts/gguf_dump.py --markdown ${OUTFILE}.gguf > ${OUTFILE}.dump.md
###############################################################################
echo == Generating Llamafile ==
cp ./llamafile/o/llama.cpp/main/main ${OUTFILE}.llamafile
# Create an .args file with settings defaults
cat >.args <<EOF
-m
${OUTFILE}.gguf
EOF
# zip align engine, gguf and default args
./llamafile/o/llamafile/zipalign -j0 ${OUTFILE}.llamafile ${OUTFILE}.gguf .args
###############################################################################
echo == Test Output ./${OUTFILE}.llamafile ==
./${OUTFILE}.llamafile --cli -p "hello world the gruff man said" &>> llamafile_output_example.output.txt
|