Update handler.py
Browse files- handler.py +14 -0
handler.py
CHANGED
@@ -31,6 +31,20 @@ class EndpointHandler:
|
|
31 |
|
32 |
|
33 |
def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
output = self.inference_model.generate(input_ids=inputs["input_ids"],pad_token_id=self.tokenizer.pad_token_id, max_new_tokens=256, do_sample=True, temperature=0.9, top_p=0.9, repetition_penalty=1.5, early_stopping=True, length_penalty = -0.3, num_beams=5, num_return_sequences=1)
|
35 |
response_raw = self.tokenizer.batch_decode(output.detach().cpu().numpy(), skip_special_tokens=True)
|
36 |
response_ls = response_raw[0].split('>>')
|
|
|
31 |
|
32 |
|
33 |
def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
|
34 |
+
INTRO = "A chat between a curious user and a human like artificial intelligence assistant. The assistant gives helpful, intelligent, detailed, and polite answers to the user's questions."
|
35 |
+
prompt = ""
|
36 |
+
|
37 |
+
# process input
|
38 |
+
inputs = data.pop("inputs", data)
|
39 |
+
parameters = data.pop("parameters", None)
|
40 |
+
chat_history = ' \n '.join(str(x) for x in inputs)
|
41 |
+
prompt = INTRO+'\n ' + chat_history
|
42 |
+
|
43 |
+
# preprocess
|
44 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
45 |
+
inputs = self.tokenizer(prompt+' \n <assistant>:', return_tensors="pt").to(device)
|
46 |
+
inputs = {k: v.to('cuda') for k, v in inputs.items()}
|
47 |
+
|
48 |
output = self.inference_model.generate(input_ids=inputs["input_ids"],pad_token_id=self.tokenizer.pad_token_id, max_new_tokens=256, do_sample=True, temperature=0.9, top_p=0.9, repetition_penalty=1.5, early_stopping=True, length_penalty = -0.3, num_beams=5, num_return_sequences=1)
|
49 |
response_raw = self.tokenizer.batch_decode(output.detach().cpu().numpy(), skip_special_tokens=True)
|
50 |
response_ls = response_raw[0].split('>>')
|