# src/llm_setup.py from transformers import AutoTokenizer, AutoModelForCausalLM import torch class HuggingFaceLLM: def __init__(self, model_name: str, device: str = 'cuda'): self.tokenizer = AutoTokenizer.from_pretrained(model_name) self.model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16 if device == 'cuda' else torch.float32, device_map="auto" ) self.device = device def generate(self, prompt: str, max_length: int = 512): inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) outputs = self.model.generate( **inputs, max_length=max_length, do_sample=True, temperature=0.7, top_p=0.9, eos_token_id=self.tokenizer.eos_token_id ) return self.tokenizer.decode(outputs[0], skip_special_tokens=True)