chen459664
/

quantization

Model card Files Files and versions

quantization / lm-quant-toolkit /src /lm_quant_toolkit /eval /perplexity.py

chen459664's picture

Add files using upload-large-folder tool

64ddf8d verified 6 months ago

history blame contribute delete

3.33 kB

	import gc
	import time

	import numpy as np
	import torch
	from datasets import load_dataset
	from tqdm import tqdm


	def cleanup():
	torch.cuda.empty_cache()
	gc.collect()


	def eval_ptb(model, tokenizer, max_length=1024, stride=512, verbose=True):
	dataset = load_dataset("ptb_text_only", "penn_treebank", split="test")
	return eval_ppl(
	"ptb",
	model,
	tokenizer,
	dataset,
	text_column="sentence",
	max_length=max_length,
	stride=stride,
	verbose=verbose,
	)


	def eval_c4(model, tokenizer, max_length=1024, stride=512, verbose=True):
	dataset = load_dataset(
	"allenai/c4",
	data_files={"validation": "en/c4-validation.00000-of-00008.json.gz"},
	split="validation",
	download_mode="reuse_dataset_if_exists",
	)
	# pick first 1100
	dataset = dataset[:1100]
	return eval_ppl(
	"C4",
	model,
	tokenizer,
	dataset,
	text_column="text",
	max_length=max_length,
	stride=stride,
	verbose=verbose,
	)


	def eval_wikitext2(model, tokenizer, max_length=1024, stride=512, verbose=True):
	dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
	return eval_ppl(
	"wikitext",
	model,
	tokenizer,
	dataset,
	text_column="text",
	max_length=max_length,
	stride=stride,
	verbose=verbose,
	)


	# Adapted from https://huggingface.co/transformers/v4.2.2/perplexity.html
	def eval_ppl(
	ds_type,
	model,
	tokenizer,
	dataset,
	text_column="text",
	max_length=1024,
	stride=512,
	verbose=True,
	):
	model.eval()
	tokenizer.pad_token = tokenizer.eos_token
	tokenizer.padding_side = "right"
	tokenizer.add_eos_token = False

	encodings = tokenizer("\n\n".join(dataset[text_column]), return_tensors="pt")

	encodings["input_ids"] = encodings["input_ids"].to("cuda")

	lls, t = [], []
	for i in tqdm(
	range(0, encodings["input_ids"].size(1), stride),
	desc=ds_type,
	disable=not verbose,
	):
	begin_loc = max(i + stride - max_length, 0)
	end_loc = min(i + stride, encodings["input_ids"].size(1))
	trg_len = end_loc - i
	input_ids = encodings["input_ids"][:, begin_loc:end_loc]
	target_ids = input_ids.clone()
	target_ids[:, :-trg_len] = -100 # ignore context

	t1 = time.time()
	with torch.no_grad():
	log_likelihood = model(input_ids, labels=target_ids).loss * trg_len
	torch.cuda.synchronize()
	t2 = time.time()
	t.append((t2 - t1))
	lls.append(log_likelihood)

	del input_ids, target_ids

	ppl = np.round(float(torch.exp(torch.stack(lls).sum() / end_loc)), 4)
	pred_time = np.round(np.mean(t), 3)
	if verbose:
	print(f"{ds_type} perplexity: {ppl}, time: {pred_time} sec")

	del encodings
	cleanup()

	return ppl, pred_time


	def eval_ppls(model, tokenizer, metric):
	ppl_wikitext, duration_wikitext = eval_wikitext2(model, tokenizer, verbose=True)
	ppl_c4, duration_c4 = eval_c4(model, tokenizer, verbose=True)
	metric["ppl_wikitext"] = ppl_wikitext
	metric["ppl_c4"] = ppl_c4
	metric["duration_wikitext"] = duration_wikitext
	metric["duration_c4"] = duration_c4
	return metric