import os from transformers import AutoTokenizer, AutoModelForCausalLM import torch MODEL_DIR = os.environ.get("MODEL_DIR", "rizz_model") PROMPT = os.environ.get("PROMPT", "User: hey, how's your day going?\nAssistant:") def main(): tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) model = AutoModelForCausalLM.from_pretrained(MODEL_DIR) device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) input_ids = tokenizer.encode(PROMPT, return_tensors="pt").to(device) outputs = model.generate( input_ids, max_length=input_ids.shape[-1] + 40, do_sample=True, top_k=50, top_p=0.95, temperature=0.9, num_return_sequences=3, pad_token_id=tokenizer.eos_token_id, ) for i, out in enumerate(outputs): text = tokenizer.decode(out, skip_special_tokens=True) reply = text.split("Assistant:")[-1].strip() print(f"=== Sample {i+1} ===") print(reply) print() if __name__ == "__main__": main()