| import argparse |
| import os |
| from pathlib import Path |
|
|
| from rkllm.api import RKLLM |
|
|
|
|
| def export_rkllm( |
| model_dir: Path, |
| output_path: Path, |
| target_platform: str, |
| num_npu_core: int, |
| optimization_level: int, |
| ): |
| llm = RKLLM() |
| ret = llm.load_huggingface(model=str(model_dir), model_lora=None, device="cpu") |
| if ret != 0: |
| raise RuntimeError(f"load_huggingface failed for {model_dir}, ret={ret}") |
|
|
| ret = llm.build( |
| do_quantization=False, |
| optimization_level=optimization_level, |
| quantized_dtype="w8a8", |
| quantized_algorithm="normal", |
| target_platform=target_platform, |
| num_npu_core=num_npu_core, |
| extra_qparams=None, |
| ) |
| if ret != 0: |
| raise RuntimeError(f"RKLLM build failed for {model_dir}, ret={ret}") |
|
|
| output_path.parent.mkdir(parents=True, exist_ok=True) |
| ret = llm.export_rkllm(str(output_path)) |
| if ret != 0: |
| raise RuntimeError(f"export_rkllm failed for {output_path}, ret={ret}") |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Export a HuggingFace-format MiniCPM model to RKLLM.") |
| parser.add_argument("--model-dir", required=True, help="Input HuggingFace model directory.") |
| parser.add_argument("--output", required=True, help="Output .rkllm path.") |
| parser.add_argument("--target-platform", default="rk3588", help="RK target platform.") |
| parser.add_argument("--num-npu-core", type=int, default=1, help="NPU cores for RKLLM build.") |
| parser.add_argument("--optimization-level", type=int, default=1, help="RKLLM optimization level.") |
| parser.add_argument("--hf-home", default=None, help="Optional writable Hugging Face cache root.") |
| args = parser.parse_args() |
|
|
| if args.hf_home: |
| hf_home = str(Path(args.hf_home).resolve()) |
| os.environ["HF_HOME"] = hf_home |
| os.environ["HUGGINGFACE_HUB_CACHE"] = str(Path(hf_home) / "hub") |
| os.environ["TRANSFORMERS_CACHE"] = str(Path(hf_home) / "transformers") |
|
|
| export_rkllm( |
| model_dir=Path(args.model_dir), |
| output_path=Path(args.output), |
| target_platform=args.target_platform, |
| num_npu_core=args.num_npu_core, |
| optimization_level=args.optimization_level, |
| ) |
| print(f"Saved: {args.output}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|