|
SHELL := /bin/bash |
|
|
|
|
|
NATIVE_ANDROID = ../Native-LLM-for-Android |
|
QWEN_VL_DIR = $(NATIVE_ANDROID)/Export_ONNX/QwenVL |
|
ONNX_SRC_DIR = $(QWEN_VL_DIR)/onnx |
|
ONNX_DEST_DIR = $(QWEN_VL_DIR)/onnx-dist |
|
STAGING_DIR = /tmp/transformers.js/staging |
|
TRANSFORMERS_JS_PATH = ../transformers.js |
|
ONNX_TOOLS_PATH = $(NATIVE_ANDROID)/ONNX_Tools |
|
|
|
|
|
NATIVE_PYTHON = $(NATIVE_ANDROID)/.venv/bin/python3 |
|
TRANSFORMERS_PYTHON = $(TRANSFORMERS_JS_PATH)/.venv/bin/python3 |
|
|
|
|
|
PARTS = A B C D E |
|
|
|
define progress_bar |
|
printf "\r Progress: \033[1;32m["; \ |
|
_done=$$(($1 * 20 / $2)); \ |
|
for ((i=0; i<_done; i++)); do printf "="; done; \ |
|
printf "\033[0m"; \ |
|
_left=$$((20 - _done)); \ |
|
for ((i=0; i<_left; i++)); do printf " "; done; \ |
|
printf "\033[1;32m]\033[0m $1/$2 Processing: \033[1;34m%s\033[K\033[0m\r" "$3" |
|
endef |
|
|
|
|
|
|
|
export-merged-source-models: export-merged-source-models-first-pass export-merged-source-models-second-pass |
|
@echo "β
Exporting merged source models complete" |
|
|
|
export-merged-source-models-first-pass: |
|
@echo "πΎ First pass: Export all models with merged tensors..." |
|
@mkdir -p $(ONNX_DEST_DIR) |
|
@files=`find $(ONNX_SRC_DIR) -name "*.onnx"`; \ |
|
total=`echo "$$files" | wc -w | tr -d ' '`; \ |
|
echo "Files found (first pass): $$total"; \ |
|
current=0; \ |
|
for item in $$files; do \ |
|
current=$$((current + 1)); \ |
|
$(call progress_bar,$$current,$$total,$$item); \ |
|
$(NATIVE_PYTHON) -u -c "import onnx, os, sys; src='$$item'; dest_dir='$(ONNX_DEST_DIR)'; \ |
|
m = onnx.load(src); \ |
|
d = os.path.join(dest_dir, os.path.basename(src)); \ |
|
onnx.save_model(m, d, all_tensors_to_one_file=True, save_as_external_data=True, location=os.path.basename(d)+'.data')" || exit 1; \ |
|
done; \ |
|
echo "β
Done first pass" |
|
|
|
export-merged-source-models-second-pass: |
|
@echo "πΎ Second pass: Converting large models to external data format..." |
|
@files=`find $(ONNX_DEST_DIR) -name "*.onnx"`; \ |
|
total=`echo "$$files" | wc -w | tr -d ' '`; \ |
|
echo "Files found (second pass): $$total"; \ |
|
current=0; \ |
|
for item in $$files; do \ |
|
current=$$((current + 1)); \ |
|
$(call progress_bar,$$current,$$total,$$item); \ |
|
$(NATIVE_PYTHON) -c 'import onnx, os, sys; \ |
|
src = """'"$$item"'"""; \ |
|
total_size = os.path.getsize(src); \ |
|
total_size += os.path.getsize(src + ".data") if os.path.exists(src + ".data") else 0; \ |
|
needs_external = total_size > 2e9; \ |
|
onnx.save_model( \ |
|
onnx.load(src), \ |
|
src, \ |
|
save_as_external_data=needs_external, \ |
|
all_tensors_to_one_file=True, \ |
|
location=(os.path.basename(src) + ".data") if needs_external else None \ |
|
); \ |
|
not needs_external and os.path.exists(src + ".data") and os.remove(src + ".data") \ |
|
' || exit 1; \ |
|
done; \ |
|
echo "β
Done second models" |
|
|
|
|
|
all-in-one: export quantize clean-large-files slim fix-gpu-buffers |
|
@echo "β¨ All done! ONNX models exported, slimmed, quantized and fixed" |
|
|
|
export: export-abcd export-e |
|
@echo "β
Export complete" |
|
|
|
export-abcd: |
|
@echo "π Exporting parts A, B, C, D..." |
|
cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \ |
|
$(NATIVE_PYTHON) QwenVL_Export_ABCD.py "Qwen/Qwen2-VL-2B-Instruct" |
|
|
|
export-e: |
|
@echo "π Exporting part E..." |
|
cd ../Native-LLM-for-Android/Export_ONNX/QwenVL && \ |
|
$(NATIVE_PYTHON) QwenVL_Export_E.py "Qwen/Qwen2-VL-2B-Instruct" |
|
|
|
slim: |
|
@echo "ποΈ Slimming ONNX models..." |
|
@files=$$(find $(ONNX_SRC_DIR) -name "*.onnx" -type f ! -name "QwenVL_E.onnx"); \ |
|
$(call progress_bar,$$files,onnxslim --verbose {} {}) |
|
@echo "β
Slimming complete" |
|
|
|
quantize: |
|
@echo "β‘ Starting quantization..." |
|
for part in $(PARTS); do \ |
|
$(MAKE) quantize-$$part || exit 1; \ |
|
done |
|
@echo "β
Quantization complete" |
|
|
|
quantize-%: |
|
@echo "β‘ Quantizing part $*..." |
|
mkdir -p $(ONNX_DEST_DIR) |
|
cd $(TRANSFORMERS_JS_PATH) && \ |
|
mkdir -p $(STAGING_DIR) && \ |
|
rm -f $(STAGING_DIR)/* && \ |
|
ln -sf $$(realpath $(ONNX_SRC_DIR))/* $(STAGING_DIR)/ && \ |
|
find $(STAGING_DIR) -name "*_*_*.onnx_data" -delete && \ |
|
find $(STAGING_DIR) -name "*_*_*.onnx" -delete && \ |
|
find $(STAGING_DIR) -name "*.onnx" ! -name "QwenVL_$**.onnx" -delete && \ |
|
EXTRA_FLAGS=""; \ |
|
if [ "$*" = "A" ]; then EXTRA_FLAGS="--op_block_list Conv DynamicQuantizeLinear DequantizeLinear Resize"; fi; \ |
|
echo "Extra Flags for part $*: $$EXTRA_FLAGS" && \ |
|
PYTHONPATH=$(TRANSFORMERS_JS_PATH) .venv/bin/python3 -m scripts.quantize \ |
|
--input_folder '$(STAGING_DIR)' \ |
|
--output_folder '$(ONNX_DEST_DIR)' \ |
|
--mode q4f16 $$EXTRA_FLAGS |
|
|
|
clean-large-files: |
|
@echo "π§Ή Removing ONNX files over 2GB..." |
|
cd $(ONNX_DEST_DIR) && \ |
|
for f in $$(find . -name "*.onnx" -type f); do \ |
|
total_size=0; \ |
|
if [ -f "$$f"".data" ]; then \ |
|
total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f"".data") )); \ |
|
elif [ -f "$$f""_data" ]; then \ |
|
total_size=$$(( $$(stat -f %z "$$f") + $$(stat -f %z "$$f""_data") )); \ |
|
else \ |
|
total_size=$$(stat -f %z "$$f"); \ |
|
fi; \ |
|
size_mb=$$(( total_size / 1048576 )); \ |
|
if [ $$total_size -ge 2147483648 ]; then \ |
|
echo " Removing $$f (size: $$size_mb MB)..."; \ |
|
rm -f "$$f" "$$f"".data" "$$f""_data"; \ |
|
fi \ |
|
done |
|
@echo "β
Large file cleanup complete" |
|
|
|
fix-gpu-buffers: |
|
@echo "π§ Fixing GPU buffers for E models..." |
|
cd $(NATIVE_ANDROID) && \ |
|
files=$$(find $(ONNX_DEST_DIR) -name "QwenVL_E_*.onnx" -type f); \ |
|
$(call progress_bar,$$files, .venv/bin/python3 ONNX_Tools/clamp_for_gpu_buffers.py --overwrite {}) |
|
@echo "β
GPU buffer fixes complete" |
|
|