cepiloth
/

ko-llama2-finetune-ex4

Text Generation

Trained with AutoTrain

text-generation-inference

Model card Files Files and versions Community

ko-llama2-finetune-ex4 / main.py

cepiloth's picture

Upload folder using huggingface_hub

c368a21 over 1 year ago

history blame contribute delete

1.67 kB

	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
	from peft import PeftModel, PeftConfig
	import argparse

	model_id = "TinyPixel/Llama-2-7B-bf16-sharded"
	peft_model_id = "checkpoint-3690"

	config = PeftConfig.from_pretrained(peft_model_id)

	bnb_config = BitsAndBytesConfig(
	load_in_8bit=False,
	load_in_4bit=True,
	llm_int8_threshold=6.0,
	llm_int8_skip_modules=None,
	llm_int8_enable_fp32_cpu_offload=False,
	llm_int8_has_fp16_weight=False,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_use_double_quant=False,
	bnb_4bit_compute_dtype="float16",
	)

	model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"": 0})
	model = PeftModel.from_pretrained(model, peft_model_id)
	tokenizer = AutoTokenizer.from_pretrained(model_id)

	model.eval()

	prompt = "Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: %s ### Response: "

	def gen(x):
	q = prompt % (x,)
	gened = model.generate(
	**tokenizer(
	q,
	return_tensors='pt',
	return_token_type_ids=False
	).to('cuda'),
	max_new_tokens=128,
	early_stopping=True,
	do_sample=True,
	)
	return tokenizer.decode(gened[0]).replace(q, "")

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Generate responses based on instructions.")
	parser.add_argument("instruction", type=str, help="The instruction for generating a response.")
	args = parser.parse_args()

	response = gen(args.instruction)
	print("Generated Response:", response)