Spaces:

ppaihack
/

zLlamaskClear

Sleeping

File size: 2,640 Bytes

fc78381
 
99c5cd4
 
 
4289215
 
 
a52d777
a64ef34
4289215
fc78381
875530c
 
3b5eac6
4289215
875530c
 
67c3a59
875530c
4289215
875530c
99c5cd4
 
 
 
 
 
 
 
8a18adb
4289215
875530c
 
 
 
 
67c3a59
 
8a18adb
a65b38c
67c3a59
 
 
 
 
 
4289215
67c3a59
99c5cd4
 
 
 
 
 
2f025da
 
478f204
99c5cd4
6c3fbea
3b5eac6
2f025da
99c5cd4

import os

import gradio as gr
from huggingface_hub import InferenceClient

import torch

from transformers import AutoTokenizer
from model.modeling_llamask import LlamaskForCausalLM
from model.tokenizer_utils import generate_custom_mask, prepare_tokenizer


access_token = os.getenv("HF_TOKEN")
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
device = 'cuda'

model = LlamaskForCausalLM.from_pretrained(model_id, torch_dtype= torch.bfloat16, token=access_token)
model = model.to(device)
model.load_adapter('theostos/zLlamask', adapter_name="zzlamask")
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")

prepare_tokenizer(tokenizer)


def respond(
    message,
    history: list[tuple[str, str]],
    max_tokens,
    temperature,
):
    prompt = f"""<|start_header_id|>system<|end_header_id|>

    You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
    {message}
    <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """
    model_inputs = generate_custom_mask(tokenizer, [prompt], device)
    
    model.disable_adapters()
    outputs = model.generate(temperature=0.7, max_tokens=32, **model_inputs)
    outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
    result_no_ft = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    
    model.enable_adapters()
    outputs = model.generate(temperature=0.7, max_tokens=32, **model_inputs)
    outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
    result_ft = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    return f"Without finetuning:\n{result_no_ft}\n\nWith finetuning:\n{result_ft}"

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    title="zLlamask",
    description="Please enter your message. Add privacy tags ( \<sensitive\>...\<\/sensitive\>) around the words you want to hide. Only the most recent message submitted will be taken into account (no history is retained)",
    chatbot=gr.Chatbot(placeholder='Please enter your message. Add privacy tags ( \<sensitive\>...\<\/sensitive\>) around the words you want to hide. Only the most recent message submitted will be taken into account (no history is retained)\n\n\nExample: What is the \<sensitive\>capital\</sensitive\> of \<sensitive\>Tonga\</sensitive\>?'),
    additional_inputs=[
        gr.Slider(minimum=1, maximum=128, value=32, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"),
    ]
)


if __name__ == "__main__":
    demo.launch()