theostos commited on
Commit
4289215
1 Parent(s): 6c3fbea

add llamask

Browse files
app.py CHANGED
@@ -1,21 +1,41 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
  def respond(
11
  message,
12
  history: list[tuple[str, str]],
13
- system_message,
14
  max_tokens,
15
  temperature,
16
- top_p,
17
  ):
18
- return "test", []
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  """
21
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ import torch
5
+
6
+ from transformers import AutoTokenizer
7
+ from .models.modeling_llamask import LlamaskForCausalLM
8
+ from masked_llm.models.tokenizer_utils import generate_custom_mask, prepare_tokenizer
9
+
10
+ model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
11
+ device = 'cpu'
12
+
13
+ model = LlamaskForCausalLM.from_pretrained(model_id, torch_dtype= torch.bfloat16)
14
+ model = model.to(device)
15
+ tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
16
+
17
+ prepare_tokenizer(tokenizer)
18
 
19
 
20
  def respond(
21
  message,
22
  history: list[tuple[str, str]],
 
23
  max_tokens,
24
  temperature,
 
25
  ):
26
+ prompt = """<|start_header_id|>system<|end_header_id|>
27
+
28
+ You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
29
+ {message}
30
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>
31
+ """
32
+ model_inputs = generate_custom_mask(tokenizer, [prompt], device)
33
+
34
+ outputs = model.generate(temperature=0.7, max_tokens=64, **model_inputs)
35
+ outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
36
+ result = tokenizer.batch_decode(outputs, skip_special_tokens=True)
37
+
38
+ return result, []
39
 
40
  """
41
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
{models → model}/modeling_llamask.py RENAMED
File without changes
{models → model}/tokenizer_utils.py RENAMED
File without changes
requirements.txt CHANGED
@@ -1 +1,4 @@
1
- huggingface_hub==0.22.2
 
 
 
 
1
+ huggingface_hub==0.22.2
2
+ pyyaml
3
+ transformers
4
+ torch