import torch import bitsandbytes as bnb from transformers import AutoTokenizer, AutoModelForCausalLM # Adjusted import for the correct model type import os from typing import Dict, List, Any # Set the environment variable for the Hugging Face token class EndpointHandler: def __init__(self, model_path="thomasabebe/coolmodel"): self.tokenizer = AutoTokenizer.from_pretrained(model_path) self.model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float32) # Adjusted model class def __call__(self, data): # Extract inputs from the data inputs = data.get("inputs", "") # Preprocess the inputs encoded_input = self.tokenizer(inputs, return_tensors='pt', padding=True, truncation=True) # Get predictions output = self.model(**encoded_input) # Postprocess the outputs prediction = output.logits.argmax(-1).tolist() # Convert the tensor to a list # Return the result return {"label": prediction}