Spaces:

ohmreborn
/

llama-question-generation

Runtime error

App Files Files Community

llama-question-generation / app.py

ohmreborn

Update app.py

10711d5 about 2 years ago

raw

history blame contribute delete

4.87 kB

	import os
	import sys

	import torch
	import transformers
	from peft import PeftModel
	from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer


	def main(
	load_8bit: bool = False,
	base_model: str = "decapoda-research/llama-7b-hf",
	lora_weights: str = "ohmreborn/llama-lora-7b",
	):
	device = 'cpu'
	base_model = base_model

	tokenizer = LlamaTokenizer.from_pretrained(base_model)

	model = LlamaForCausalLM.from_pretrained(
	base_model,
	load_in_8bit=load_8bit,
	max_memory={"cpu":"15GiB"},
	device_map="auto",
	low_cpu_mem_usage=True
	)
	model = PeftModel.from_pretrained(
	model,
	lora_weights,
	device_map={"": device},
	)


	model.config.pad_token_id = tokenizer.pad_token_id = 0
	model.config.bos_token_id = 1
	model.config.eos_token_id = 2



	model.eval()
	if torch.__version__ >= "2" and sys.platform != "win32":
	model = torch.compile(model)
	return model,tokenizer
	model,tokenizer = main()


	from typing import Union
	import requests
	class Prompter(object):

	def __init__(self):

	url = "https://raw.githubusercontent.com/tloen/alpaca-lora/main/templates/alpaca.json"
	response = requests.request("GET", url)
	self.template = response.json()

	def generate_prompt(
	self,
	instruction: str,
	input: Union[None, str] = None,
	label: Union[None, str] = None,
	) -> str:
	if input:
	res = self.template["prompt_input"].format(
	instruction=instruction, input=input
	)
	else:
	res = self.template["prompt_no_input"].format(
	instruction=instruction
	)
	if label:
	res = f"{res}{label}"
	return res

	def get_response(self, output: str) -> str:
	return output.split(self.template["response_split"])[1].strip()

	def generate(
	input=None,
	temperature=0.75, # ทำให้ model มั่นใจมากขึ้นใน softmax function https://stackoverflow.com/questions/58764619/why-should-we-use-temperature-in-softmax/63471046#63471046
	top_p=0.95, # จะ เอา ค่าความน่าจะเป็นของ top ความน่าจะเป็นที่มากที่สุดมารวมกันจนมากกว่า 0.95 แล้วค่อยให้ model สุ่ม ออกมาhttps://www.linkedin.com/pulse/text-generation-temperature-top-p-sampling-gpt-models-selvakumar
	top_k=50, # เอา 50 แรก แต่ถ้า ใส่ค่า top p ไปด้วย จะทำให้ คิดของ top p ก่อน เช่น ถ้า 50 ตัวแรกมีความน่าจะเป็นรวมกัน = 0.90 ซึ่งไม่ถึงค่าที่ตั้งไว้ก็เอามาไว้ใช้สำหรับการทำนายครั้งถัดไป https://docs.cohere.com/docs/controlling-generation-with-top-k-top-p#2-pick-from-amongst-the-top-tokens-top-k
	max_new_tokens=1024,
	instruction="Please create an inference question in the style of TOEFL reading comprehension section. Also provide an answer in the format",
	model=model,
	tokenizer=tokenizer,
	):
	prompter = Prompter()
	prompt = prompter.generate_prompt(instruction, input,)
	print(prompt)
	inputs = tokenizer(prompt, return_tensors="pt")
	input_ids = inputs["input_ids"]
	generation_config = GenerationConfig(
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	repetition_penalty=1.2
	)


	with torch.no_grad():
	generation_output = model.generate(
	input_ids=input_ids,
	generation_config=generation_config,
	return_dict_in_generate=True,
	output_scores=True,
	max_new_tokens=max_new_tokens,
	)
	s = generation_output.sequences[0]
	output = tokenizer.decode(s)
	return prompter.get_response(output)


	import gradio as gr
	example = """Education is the process of facilitating learning, or the acquisition of knowledge, skills, values, morals, beliefs, habits,
	and personal development. There are many types of potential educational aims and objectives,
	irrespective of the specific subject being learned. Some can cross multiple school disciplines.
	"""
	demo = gr.Interface(fn=generate,
	inputs=[gr.Textbox(value=example,label='inputs'),
	gr.Slider(0,1,value=0.75,step=0.05,label='temperature'),
	gr.Slider(0,1,value=0.95,step=0.05,label='top_p'),
	gr.Slider(0,100,value=50,step=10,label='top_k')],
	outputs=["text"])
	demo.launch(inline=False)