from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline import torch def load_model_and_tokenizer(model_name: str): """Load a model and tokenizer for inference.""" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) return model, tokenizer def generate_answer(model, tokenizer, prompt: str, max_tokens: int = 256): """Generate text output from a model given a prompt.""" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate(**inputs, max_new_tokens=max_tokens) return tokenizer.decode(outputs[0], skip_special_tokens=True) def build_pipeline(model_name: str, task="text2text-generation"): """Return a Hugging Face pipeline for inference.""" return pipeline(task, model=model_name)