from transformers import AutoModelForCausalLM, AutoTokenizer import time # Path to the local directory containing the Mistral model files model_path = "B:\Arcee\mergekit\merged\Patent" # Load Mistral model and tokenizer model = AutoModelForCausalLM.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path) # Function to generate response def generate_response(input_text): # Tokenize input text input_ids = tokenizer(input_text, return_tensors="pt").input_ids start_time = time.time() # Generate response output = model.generate(input_ids, max_length=256, num_beams=5, temperature=0.7, top_p=0.95, early_stopping=True) end_time = time.time() # Decode response response = tokenizer.decode(output[0], skip_special_tokens=True) # Calculate time taken time_taken = end_time - start_time return response, time_taken # Main loop for chatting while True: user_input = input("You: ") if user_input.lower() == 'exit': print("Goodbye!") break response, time_taken = generate_response(user_input) print("Bot:", response) print(f"Time taken: {time_taken:.2f} seconds")