import torch from transformers import AutoModelForCausalLM, AutoTokenizer def chat_with_model(model_path: str): # Ensure CUDA is available and set the device to use the first GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Using device: {device}") # Load the model and tokenizer model = AutoModelForCausalLM.from_pretrained(model_path).to(device) tokenizer = AutoTokenizer.from_pretrained(model_path) # Wrap the model with DataParallel to use multiple GPUs if torch.cuda.is_available() and torch.cuda.device_count() > 1: print(f"Using {torch.cuda.device_count()} GPUs!") model = torch.nn.DataParallel(model) print("You're now chatting with the model. Type 'quit' to exit.") while True: # Get user input input_text = input("You: ") if input_text.lower() == 'quit': break # Encode the input text input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device) # Generate a response with torch.no_grad(): generated_text_samples = model.generate(input_ids, max_length=50, pad_token_id=tokenizer.eos_token_id) # Decode and print the model's response response_text = tokenizer.decode(generated_text_samples[0], skip_special_tokens=True) print("AI:", response_text) if __name__ == "__main__": model_path = '/home/energyxadmin/UI2/merge' chat_with_model(model_path)