from swarms.agents import MultiModalAgent load_dict = { "ImageCaptioning": "cuda" } node = MultiModalAgent(load_dict) text = node.run_text("What is your name? Generate a picture of yourself") img = node.run_img("/image1", "What is this image about?") chat = node.chat( "What is your name? Generate a picture of yourself. What is this image about?", streaming=True )