Spaces:

ppaihack
/

zLlamaskClear

Sleeping

App Files Files Community

zLlamaskClear / app.py

theostos

add better description

478f204 about 1 year ago

raw

history blame contribute delete

2.64 kB

	import os

	import gradio as gr
	from huggingface_hub import InferenceClient

	import torch

	from transformers import AutoTokenizer
	from model.modeling_llamask import LlamaskForCausalLM
	from model.tokenizer_utils import generate_custom_mask, prepare_tokenizer


	access_token = os.getenv("HF_TOKEN")
	model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
	device = 'cuda'

	model = LlamaskForCausalLM.from_pretrained(model_id, torch_dtype= torch.bfloat16, token=access_token)
	model = model.to(device)
	model.load_adapter('theostos/zLlamask', adapter_name="zzlamask")
	tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")

	prepare_tokenizer(tokenizer)


	def respond(
	message,
	history: list[tuple[str, str]],
	max_tokens,
	temperature,
	):
	prompt = f"""<\|start_header_id\|>system<\|end_header_id\|>

	You are a helpful assistant.<\|eot_id\|><\|start_header_id\|>user<\|end_header_id\|>
	{message}
	<\|eot_id\|><\|start_header_id\|>assistant<\|end_header_id\|>
	"""
	model_inputs = generate_custom_mask(tokenizer, [prompt], device)

	model.disable_adapters()
	outputs = model.generate(temperature=0.7, max_tokens=32, **model_inputs)
	outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
	result_no_ft = tokenizer.batch_decode(outputs, skip_special_tokens=True)

	model.enable_adapters()
	outputs = model.generate(temperature=0.7, max_tokens=32, **model_inputs)
	outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
	result_ft = tokenizer.batch_decode(outputs, skip_special_tokens=True)

	return f"Without finetuning:\n{result_no_ft}\n\nWith finetuning:\n{result_ft}"

	"""
	For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
	"""
	demo = gr.ChatInterface(
	respond,
	title="zLlamask",
	description="Please enter your message. Add privacy tags ( \<sensitive\>...\<\/sensitive\>) around the words you want to hide. Only the most recent message submitted will be taken into account (no history is retained)",
	chatbot=gr.Chatbot(placeholder='Please enter your message. Add privacy tags ( \<sensitive\>...\<\/sensitive\>) around the words you want to hide. Only the most recent message submitted will be taken into account (no history is retained)\n\n\nExample: What is the \<sensitive\>capital\</sensitive\> of \<sensitive\>Tonga\</sensitive\>?'),
	additional_inputs=[
	gr.Slider(minimum=1, maximum=128, value=32, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"),
	]
	)


	if __name__ == "__main__":
	demo.launch()