| from transformers import AutoTokenizer, AutoModelForCausalLM |
| import torch |
|
|
| class ModelHandler: |
| def __init__(self, model_name): |
| self.tokenizer = AutoTokenizer.from_pretrained(model_name) |
| self.model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto") |
|
|
| def chat(self, message: str, max_new_tokens: int = 200): |
| messages = [{"role": "user", "content": message}] |
| inputs = self.tokenizer.apply_chat_template( |
| messages, |
| add_generation_prompt=True, |
| tokenize=True, |
| return_dict=True, |
| return_tensors="pt", |
| ).to(self.model.device) |
|
|
| outputs = self.model.generate(**inputs, max_new_tokens=max_new_tokens) |
| return self.tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:], skip_special_tokens=True) |
|
|