from transformers import AutoModelForCausalLM, AutoTokenizer import torch from peft import PeftModel, PeftConfig import gc gc.collect() model_name = "MoodChartAI/basicmood" adapters_name = "" torch.cuda.empty_cache() os.system("sudo swapoff -a; swapon -a") print(f"Starting to load the model {model_name} into memory") m = AutoModelForCausalLM.from_pretrained( model_name, #load_in_4bit=True, ).to(device='cpu:7') print(f"Loading the adapters from {adapters_name}") m = PeftModel.from_pretrained(m, adapters_name) tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neo-1.3B", trust_remote_code=True) while True: mood_input = input("Mood: ") inputs = tokenizer("Prompt: %s Completions: You're feeling"%mood_input, return_tensors="pt", return_attention_mask=True) inputs.to(device='cpu:8') outputs = m.generate(**inputs, max_length=12) print(tokenizer.batch_decode(outputs)[0])