theostos commited on
Commit
67c3a59
1 Parent(s): a65b38c

double output

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -16,6 +16,7 @@ device = 'cuda'
16
 
17
  model = LlamaskForCausalLM.from_pretrained(model_id, torch_dtype= torch.bfloat16, token=access_token)
18
  model = model.to(device)
 
19
  tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
20
 
21
  prepare_tokenizer(tokenizer)
@@ -27,6 +28,7 @@ def respond(
27
  max_tokens,
28
  temperature,
29
  ):
 
30
  prompt = f"""<|start_header_id|>system<|end_header_id|>
31
 
32
  You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
@@ -34,12 +36,18 @@ def respond(
34
  <|eot_id|><|start_header_id|>assistant<|end_header_id|>
35
  """
36
  model_inputs = generate_custom_mask(tokenizer, [prompt], device)
37
-
 
38
  outputs = model.generate(temperature=0.7, max_tokens=32, **model_inputs)
39
  outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
40
- result = tokenizer.batch_decode(outputs, skip_special_tokens=True)
 
 
 
 
 
41
 
42
- return result
43
 
44
  """
45
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
16
 
17
  model = LlamaskForCausalLM.from_pretrained(model_id, torch_dtype= torch.bfloat16, token=access_token)
18
  model = model.to(device)
19
+ model.load_adapter('theostos/zLlamask', adapter_name="zzlamask")
20
  tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left")
21
 
22
  prepare_tokenizer(tokenizer)
 
28
  max_tokens,
29
  temperature,
30
  ):
31
+
32
  prompt = f"""<|start_header_id|>system<|end_header_id|>
33
 
34
  You are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>
 
36
  <|eot_id|><|start_header_id|>assistant<|end_header_id|>
37
  """
38
  model_inputs = generate_custom_mask(tokenizer, [prompt], device)
39
+
40
+ model.disable_adapters()
41
  outputs = model.generate(temperature=0.7, max_tokens=32, **model_inputs)
42
  outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
43
+ result_no_ft = tokenizer.batch_decode(outputs, skip_special_tokens=True)
44
+
45
+ model.enable_adapters()
46
+ outputs = model.generate(temperature=0.7, max_tokens=32, **model_inputs)
47
+ outputs = outputs[:, model_inputs['input_ids'].shape[1]:]
48
+ result_ft = tokenizer.batch_decode(outputs, skip_special_tokens=True)
49
 
50
+ return f"Without finetuning:\n{result_no_ft}\n\nWith finetuning:\n{result_ft}"
51
 
52
  """
53
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface