File size: 5,878 Bytes
89f5b9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
from autogen import AssistantAgent, UserProxyAgent, config_list_from_json
import autogen
import replicate
import requests
from datetime import datetime
import http.client
import json
import base64
config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST")
llm_config = {"config_list": config_list, "request_timeout": 120}
# function to use llava model to review image
def img_review(image_url, prompt):
data = {
"data": [
{
"image": "https://picsum.photos/200",
"features": [],
},
]}
headers = {
"x-api-key": "token 8uOw4ntevc8JKo0Q3tQq:2975e2827ebeb4e103f7b58c1410ba58fa47bc27b1302de614a000bf51bd2114",
"content-type": "application/json",
}
connection = http.client.HTTPSConnection("api.scenex.jina.ai")
connection.request("POST", "/v1/describe", json.dumps(data), headers)
response = connection.getresponse()
print(response.status, response.reason)
response_data = response.read().decode("utf-8")
print(response_data)
connection.close()
return response_data
result = img_review(
"https://cdn.discordapp.com/attachments/1083723388712919182/1089909178266558554/HannaD_A_captivating_digital_artwork_features_a_red-haired_girl_664d73dc-b537-490e-b044-4fbf22733559.png", "a llama driving a car")
print(result)
# def img_review(image_path, prompt):
# output = replicate.run(
# "yorickvp/llava-13b:6bc1c7bb0d2a34e413301fee8f7cc728d2d4e75bfab186aa995f63292bda92fc",
# input={
# "image": open(image_path, "rb"),
# "prompt": f"What is happening in the image? From scale 1 to 10, decide how similar the image is to the text prompt {prompt}?",
# }
# )
# result = ""
# for item in output:
# result += item
# return result
# function to use stability-ai model to generate image
def text_to_image_generation(prompt):
output = replicate.run(
"stability-ai/sdxl:c221b2b8ef527988fb59bf24a8b97c4561f1c671f73bd389f866bfb27c061316",
input={
"prompt": prompt
}
)
if output and len(output) > 0:
# Get the image URL from the output
image_url = output[0]
print(f"generated image for {prompt}: {image_url}")
# Download the image and save it with a filename based on the prompt and current time
current_time = datetime.now().strftime("%Y%m%d%H%M%S")
shortened_prompt = prompt[:50]
filename = f"imgs/{shortened_prompt}_{current_time}.png"
response = requests.get(image_url)
if response.status_code == 200:
with open(filename, "wb") as file:
file.write(response.content)
return f"Image saved as '{filename}'"
else:
return "Failed to download and save the image."
else:
return "Failed to generate the image."
# Create llm config
llm_config_assistants = {
"functions": [
{
"name": "text_to_image_generation",
"description": "use latest AI model to generate image based on a prompt, return the file path of image generated",
"parameters": {
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": "a great text to image prompt that describe the image",
}
},
"required": ["prompt"],
},
},
{
"name": "image_review",
"description": "review & critique the AI generated image based on original prompt, decide how can images & prompt can be improved",
"parameters": {
"type": "object",
"properties": {
"prompt": {
"type": "string",
"description": "the original prompt used to generate the image",
},
"image_path": {
"type": "string",
"description": "the image file path, make sure including the full file path & file extension",
}
},
"required": ["prompt", "image_path"],
},
},
],
"config_list": config_list,
"request_timeout": 120}
# Create assistant agent
img_gen_assistant = AssistantAgent(
name="text_to_img_prompt_expert",
system_message="You are a text to image AI model expert, you will use text_to_image_generation function to generate image with prompt provided, and also improve prompt based on feedback provided until it is 10/10.",
llm_config=llm_config_assistants,
function_map={
"image_review": img_review,
"text_to_image_generation": text_to_image_generation
}
)
img_critic_assistant = AssistantAgent(
name="img_critic",
system_message="You are an AI image critique, you will use img_review function to review the image generated by the text_to_img_prompt_expert against the original prompt, and provide feedback on how to improve the prompt.",
llm_config=llm_config_assistants,
function_map={
"image_review": img_review,
"text_to_image_generation": text_to_image_generation
}
)
# Create user proxy agent
user_proxy = UserProxyAgent(
name="user_proxy",
human_input_mode="ALWAYS",
)
# Create groupchat
groupchat = autogen.GroupChat(
agents=[user_proxy, img_gen_assistant, img_critic_assistant], messages=[], max_round=50)
manager = autogen.GroupChatManager(
groupchat=groupchat,
llm_config=llm_config)
# # Start the conversation
# user_proxy.initiate_chat(
# manager, message="Generate a photo realistic image of llama driving a car") |