Spaces:
Sleeping
Sleeping
File size: 2,640 Bytes
fc78381 99c5cd4 4289215 a52d777 a64ef34 4289215 fc78381 875530c 3b5eac6 4289215 875530c 67c3a59 875530c 4289215 875530c 99c5cd4 8a18adb 4289215 875530c 67c3a59 8a18adb a65b38c 67c3a59 4289215 67c3a59 99c5cd4 2f025da 478f204 99c5cd4 6c3fbea 3b5eac6 2f025da 99c5cd4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import os
import gradio as gr
from huggingface_hub import InferenceClient
import torch
from transformers import AutoTokenizer
from model.modeling_llamask import LlamaskForCausalLM
from model.tokenizer_utils import generate_custom_mask, prepare_tokenizer
access_token = os.getenv("HF_TOKEN")
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
device = 'cuda'
model = LlamaskForCausalLM.from_pretrained(model_id, torch_dtype= torch.bfloat16, token=access_token)
model = model.to(device)
model.load_adapter('theostos/zLlamask', adapter_name="zzlamask")
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
prepare_tokenizer(tokenizer)
def respond(
message,
history: list[tuple[str, str]],
max_tokens,
temperature,
):
prompt = f"""<|start_header_id|>system<|end_header_id|>
You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
{message}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""
model_inputs = generate_custom_mask(tokenizer, [prompt], device)
model.disable_adapters()
outputs = model.generate(temperature=0.7, max_tokens=32, **model_inputs)
outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
result_no_ft = tokenizer.batch_decode(outputs, skip_special_tokens=True)
model.enable_adapters()
outputs = model.generate(temperature=0.7, max_tokens=32, **model_inputs)
outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
result_ft = tokenizer.batch_decode(outputs, skip_special_tokens=True)
return f"Without finetuning:\n{result_no_ft}\n\nWith finetuning:\n{result_ft}"
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
title="zLlamask",
description="Please enter your message. Add privacy tags ( \<sensitive\>...\<\/sensitive\>) around the words you want to hide. Only the most recent message submitted will be taken into account (no history is retained)",
chatbot=gr.Chatbot(placeholder='Please enter your message. Add privacy tags ( \<sensitive\>...\<\/sensitive\>) around the words you want to hide. Only the most recent message submitted will be taken into account (no history is retained)\n\n\nExample: What is the \<sensitive\>capital\</sensitive\> of \<sensitive\>Tonga\</sensitive\>?'),
additional_inputs=[
gr.Slider(minimum=1, maximum=128, value=32, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"),
]
)
if __name__ == "__main__":
demo.launch() |