theostos commited on
Commit
875530c
1 Parent(s): 528c713
Files changed (1) hide show
  1. app.py +18 -18
app.py CHANGED
@@ -10,15 +10,15 @@ from model.modeling_llamask import LlamaskForCausalLM
10
  from model.tokenizer_utils import generate_custom_mask, prepare_tokenizer
11
 
12
 
13
- # access_token = os.getenv("HF_TOKEN")
14
- # model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
15
- # device = 'cpu'
16
 
17
- # model = LlamaskForCausalLM.from_pretrained(model_id, torch_dtype= torch.bfloat16, token=access_token)
18
- # model = model.to(device)
19
- # tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
20
 
21
- # prepare_tokenizer(tokenizer)
22
 
23
 
24
  def respond(
@@ -27,19 +27,19 @@ def respond(
27
  max_tokens,
28
  temperature,
29
  ):
30
- # prompt = """<|start_header_id|>system<|end_header_id|>
31
 
32
- # You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
33
- # {message}
34
- # <|eot_id|><|start_header_id|>assistant<|end_header_id|>
35
- # """
36
- # model_inputs = generate_custom_mask(tokenizer, [prompt], device)
37
 
38
- # outputs = model.generate(temperature=0.7, max_tokens=64, **model_inputs)
39
- # outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
40
- # result = tokenizer.batch_decode(outputs, skip_special_tokens=True)
41
 
42
- return "test"
43
 
44
  """
45
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
@@ -47,7 +47,7 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
47
  demo = gr.ChatInterface(
48
  respond,
49
  additional_inputs=[
50
- gr.Markdown("Please enter your message. Add privacy tags (<sensitive>...</sensitive>) around the words you want to hide. Only the most recent message submitted will be taken into account (no history is retained)."),
51
  gr.Slider(minimum=1, maximum=128, value=32, step=1, label="Max new tokens"),
52
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
  ],
 
10
  from model.tokenizer_utils import generate_custom_mask, prepare_tokenizer
11
 
12
 
13
+ access_token = os.getenv("HF_TOKEN")
14
+ model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
15
+ device = 'cpu'
16
 
17
+ model = LlamaskForCausalLM.from_pretrained(model_id, torch_dtype= torch.bfloat16, token=access_token)
18
+ model = model.to(device)
19
+ tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
20
 
21
+ prepare_tokenizer(tokenizer)
22
 
23
 
24
  def respond(
 
27
  max_tokens,
28
  temperature,
29
  ):
30
+ prompt = """<|start_header_id|>system<|end_header_id|>
31
 
32
+ You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
33
+ {message}
34
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>
35
+ """
36
+ model_inputs = generate_custom_mask(tokenizer, [prompt], device)
37
 
38
+ outputs = model.generate(temperature=0.7, max_tokens=64, **model_inputs)
39
+ outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
40
+ result = tokenizer.batch_decode(outputs, skip_special_tokens=True)
41
 
42
+ return result
43
 
44
  """
45
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
47
  demo = gr.ChatInterface(
48
  respond,
49
  additional_inputs=[
50
+ gr.Markdown("Please enter your message. Add privacy tags (\<sensitive\>...\<\/sensitive\>) around the words you want to hide. Only the most recent message submitted will be taken into account (no history is retained)."),
51
  gr.Slider(minimum=1, maximum=128, value=32, step=1, label="Max new tokens"),
52
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
  ],