meyandrei
/

bankchat

@@ -1,71 +0,0 @@
-from typing import Dict, List, Any
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import re
-class EndpointHandler():
-    def __init__(self, path="meyandrei/bankchat"):
-        # Load the model and tokenizer
-        self.tokenizer = AutoTokenizer.from_pretrained(path, padding_side='left', use_safetensors=True)
-        self.model = AutoModelForCausalLM.from_pretrained(path, use_safetensors=True)
-        self.context_token = self.tokenizer.encode('<|context|>', return_tensors='pt')
-        self.endofcontext_token = self.tokenizer.encode(' <|endofcontext|>', return_tensors='pt')
-    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
-        """
-        data args:
-            inputs (:obj: `str`)
-            context (:obj: `list` of `str`)
-        Return:
-            A :obj:`list` | `dict`: will be serialized and returned
-        """
-        user_input = data.get('inputs', '')
-        history = data.get('context', [])
-        if history == []:
-            context_tokenized = torch.LongTensor(history)
-        else:
-            history_str = tokenizer.decode(history[0])
-            turns = re.split('<\|system\|>|<\|user\|>', history_str)[1:]
-            for i in range(0, len(turns)-1, 2):
-                turns[i] = '<|user|>' + turns[i]
-                turns[i+1] = '<|system|>' + turns[i+1]
-            context_tokenized = self.tokenizer.encode(''.join(turns), return_tensors='pt')
-        user_input_tokenized = self.tokenizer.encode('  ' + user_input, return_tensors='pt')
-        model_input = torch.cat([self.context_token, context_tokenized, user_input_tokenized, self.endofcontext_token], dim=-1)
-        attention_mask = torch.ones_like(model_input)
-        out_tokenized = self.model.generate(model_input, max_length=1024, eos_token_id=50258, pad_token_id=50260, attention_mask=attention_mask).tolist()[0]
-        out_str = self.tokenizer.decode(out_tokenized)
-        out_str = out_str.split('\n')[0]
-        generated_substring = out_str.split('')[1]  # belief, actions, system_response
-        beliefs_start_index = generated_substring.find('') + len('')
-        beliefs_end_index = generated_substring.find('', beliefs_start_index)
-        actions_start_index = generated_substring.find('') + len('')
-        actions_end_index = generated_substring.find('', actions_start_index)
-        response_start_index = generated_substring.find('') + len('')
-        response_end_index = generated_substring.find('', response_start_index)
-        beliefs_str = generated_substring[beliefs_start_index:beliefs_end_index]
-        actions_str = generated_substring[actions_start_index:actions_end_index]
-        system_response_str = generated_substring[response_start_index:response_end_index]
-        system_resp_tokenized = self.tokenizer.encode('  ' + system_response_str, return_tensors='pt')
-        history = torch.cat([torch.LongTensor(history), user_input_tokenized, system_resp_tokenized], dim=-1).tolist()
-        # Prepare the output
-        model_outputs = {
-            'response': system_response_str,
-            'context': history,
-            'beliefs': beliefs_str,
-            'actions': actions_str
-        }
-        return model_outputs