mogaio commited on
Commit
ff02655
1 Parent(s): db91f60

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +14 -0
handler.py CHANGED
@@ -31,6 +31,20 @@ class EndpointHandler:
31
 
32
 
33
  def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  output = self.inference_model.generate(input_ids=inputs["input_ids"],pad_token_id=self.tokenizer.pad_token_id, max_new_tokens=256, do_sample=True, temperature=0.9, top_p=0.9, repetition_penalty=1.5, early_stopping=True, length_penalty = -0.3, num_beams=5, num_return_sequences=1)
35
  response_raw = self.tokenizer.batch_decode(output.detach().cpu().numpy(), skip_special_tokens=True)
36
  response_ls = response_raw[0].split('>>')
 
31
 
32
 
33
  def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
34
+ INTRO = "A chat between a curious user and a human like artificial intelligence assistant. The assistant gives helpful, intelligent, detailed, and polite answers to the user's questions."
35
+ prompt = ""
36
+
37
+ # process input
38
+ inputs = data.pop("inputs", data)
39
+ parameters = data.pop("parameters", None)
40
+ chat_history = ' \n '.join(str(x) for x in inputs)
41
+ prompt = INTRO+'\n ' + chat_history
42
+
43
+ # preprocess
44
+ device = "cuda" if torch.cuda.is_available() else "cpu"
45
+ inputs = self.tokenizer(prompt+' \n <assistant>:', return_tensors="pt").to(device)
46
+ inputs = {k: v.to('cuda') for k, v in inputs.items()}
47
+
48
  output = self.inference_model.generate(input_ids=inputs["input_ids"],pad_token_id=self.tokenizer.pad_token_id, max_new_tokens=256, do_sample=True, temperature=0.9, top_p=0.9, repetition_penalty=1.5, early_stopping=True, length_penalty = -0.3, num_beams=5, num_return_sequences=1)
49
  response_raw = self.tokenizer.batch_decode(output.detach().cpu().numpy(), skip_special_tokens=True)
50
  response_ls = response_raw[0].split('>>')