openbmb
/

minicpm_2b_int4

Text Generation

4-bit precision

Model card Files Files and versions Community

minicpm_2b_int4 / run_language_modeling_task.py

linglingdan's picture

Upload run_language_modeling_task.py

a44f087 verified 24 days ago

raw history blame contribute delete

No virus

3.19 kB

	from argparse import ArgumentParser

	import datasets
	import torch
	from transformers import AutoTokenizer

	from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
	from auto_gptq.eval_tasks import LanguageModelingTask


	DATASET = "/root/ld/ld_project/AutoGPTQ/examples/quantization/dataset/raw_datasets"
	WITH_INPUT_TEMPLATE = "Instruction:\n{instruction}\n\nInput:\n{input}\n\nOutput:\n"
	WITHOUT_INPUT_TEMPLATE = "<s><用户>{instruction}<AI>Output:"


	def ds_refactor_fn(samples):
	instruction_data = samples["instruction"]
	input_data = samples["input"]
	output_data = samples["output"]

	new_samples = {"prompt": [], "output": []}
	for instruction_txt, input_txt, output_txt in zip(instruction_data, input_data, output_data):
	if input_txt:
	prompt = WITH_INPUT_TEMPLATE.format(instruction=instruction_txt, input=input_txt)
	else:
	prompt = WITHOUT_INPUT_TEMPLATE.format(instruction=instruction_txt)
	new_samples["prompt"].append(prompt)
	new_samples["output"].append(output_txt)

	return new_samples


	def main():
	parser = ArgumentParser()
	parser.add_argument("--base_model_dir", type=str,default='/root/ld/ld_model_pretrained/miniCPM-bf16')
	parser.add_argument("--quantized_model_dir", type=str,default='/root/ld/ld_project/AutoGPTQ/examples/quantization/minicpm_2b_4bit')
	parser.add_argument(
	"--num_samples",
	type=int,
	default=200,
	help="how many samples will be sampled to evaluation",
	)
	parser.add_argument("--sample_max_len", type=int, default=1024, help="max tokens for each sample")
	parser.add_argument("--block_max_len", type=int, default=2048, help="max tokens for each data block")
	parser.add_argument("--use_triton", action="store_true")
	args = parser.parse_args()

	tokenizer = AutoTokenizer.from_pretrained(args.base_model_dir)

	model = AutoGPTQForCausalLM.from_pretrained(args.base_model_dir, BaseQuantizeConfig(),trust_remote_code=True)
	model.to("cuda:0")

	task = LanguageModelingTask(
	model=model,
	tokenizer=tokenizer,
	data_name_or_path=DATASET,
	prompt_col_name="prompt",
	label_col_name="output",

	**{
	"num_samples": args.num_samples, # how many samples will be sampled to evaluation
	"sample_max_len": args.sample_max_len, # max tokens for each sample
	"block_max_len": args.block_max_len, # max tokens for each data block
	"load_fn": datasets.load_dataset, # function to load dataset
	"preprocess_fn": ds_refactor_fn, # function to preprocess dataset
	"truncate_prompt": False, # truncate label when sample's length exceed sample_max_len
	},
	)

	print(f"eval result for base model: {task.run()}")
	task.model = None
	model.cpu()
	del model
	torch.cuda.empty_cache()

	model = AutoGPTQForCausalLM.from_quantized(args.quantized_model_dir, device="cuda:0", use_triton=args.use_triton,trust_remote_code=True)
	task.model = model
	task.device = model.device
	print(f"eval result for quantized model: {task.run()}")


	if __name__ == "__main__":
	main()