happyme531
/

Qwen2-Audio-rkllm

Model card Files Files and versions Community

Qwen2-Audio-rkllm / rkllm-convert.py

happyme531's picture

Upload 20 files

2ef3e1d verified 9 days ago

1.22 kB

	from rkllm.api import RKLLM
	from datasets import load_dataset
	from transformers import AutoTokenizer
	from tqdm import tqdm
	import torch
	from torch import nn
	import os
	# os.environ['CUDA_VISIBLE_DEVICES']='1'

	modelpath = '.'
	# modelpath = "./path/to/Qwen-1.8B-F16.gguf"
	llm = RKLLM()

	# Load model
	# Use 'export CUDA_VISIBLE_DEVICES=2' to specify GPU device
	# options ['cpu', 'cuda']
	ret = llm.load_huggingface(model=modelpath, model_lora = None, device='cpu')
	# ret = llm.load_gguf(model = modelpath)
	if ret != 0:
	print('Load model failed!')
	exit(ret)

	# Build model
	dataset = "./data_quant.json"
	# Json file format, please note to add prompt in the input，like this:
	# [{"input":"Human: 你好！\nAssistant: ", "target": "你好！我是人工智能助手KK！"},...]

	qparams = None
	# qparams = 'gdq.qparams' # Use extra_qparams
	ret = llm.build(do_quantization=True, optimization_level=1, quantized_dtype='w8a8',
	quantized_algorithm='normal', target_platform='rk3588', num_npu_core=3, extra_qparams=qparams)

	if ret != 0:
	print('Build model failed!')
	exit(ret)

	# Export rkllm model
	ret = llm.export_rkllm("./qwen.rkllm")
	if ret != 0:
	print('Export model failed!')
	exit(ret)