Alexandros Popov
commited on
Commit
·
20d32a0
1
Parent(s):
bde9b8e
first art director / critic discussion.
Browse files
agents.py
CHANGED
|
@@ -50,13 +50,21 @@ art_director_model = InferenceClientModel(
|
|
| 50 |
token=HUGGING_FACE_TOKEN,
|
| 51 |
)
|
| 52 |
art_director = CodeAgent(
|
| 53 |
-
tools=[jdg.propose_operations],
|
| 54 |
model=art_director_model,
|
| 55 |
managed_agents=[picture_operator],
|
| 56 |
-
name="
|
| 57 |
description=(
|
| 58 |
-
"
|
| 59 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
),
|
| 61 |
)
|
| 62 |
|
|
|
|
| 50 |
token=HUGGING_FACE_TOKEN,
|
| 51 |
)
|
| 52 |
art_director = CodeAgent(
|
| 53 |
+
tools=[jdg.propose_operations, jdg.critic],
|
| 54 |
model=art_director_model,
|
| 55 |
managed_agents=[picture_operator],
|
| 56 |
+
name="Manager",
|
| 57 |
description=(
|
| 58 |
+
"You manage the relations between the art director, the picture operator and the critic."
|
| 59 |
+
"You must present the images to improve to the art director, who will propose operations to apply to the image."
|
| 60 |
+
"You must then pass the operations to the picture operator, who will apply them to the image."
|
| 61 |
+
"Finally, you must present the resulting image to the critic, who will evaluate it and give feedback."
|
| 62 |
+
"You must then decide whether to continue the process or stop it based on the critic's feedback."
|
| 63 |
+
"You must not perform any operations on the image yourself, only pass them to the picture operator."
|
| 64 |
+
"You must not evaluate the image yourself, only pass it to the critic."
|
| 65 |
+
"You must not propose operations yourself, only pass them to the picture operator."
|
| 66 |
+
"You must not invent new methods or tools, only use the ones provided."
|
| 67 |
+
"If you need, simply ignore a specific operation."
|
| 68 |
),
|
| 69 |
)
|
| 70 |
|
judges.py
CHANGED
|
@@ -15,10 +15,20 @@ def pil_image_to_data_url(pil_image, format="JPEG"):
|
|
| 15 |
return f"data:{mime_type};base64,{base64_encoded_data}"
|
| 16 |
|
| 17 |
|
| 18 |
-
def call_to_llm(image_path, model, system_prompt=None, user_prompt=None):
|
| 19 |
img = Image.open(image_path)
|
| 20 |
data_url = pil_image_to_data_url(img, format=img.format)
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
client = OpenAI(
|
| 23 |
base_url="https://api.studio.nebius.com/v1/",
|
| 24 |
api_key=os.environ.get("NEBIUS_TOKEN"),
|
|
@@ -35,10 +45,7 @@ def call_to_llm(image_path, model, system_prompt=None, user_prompt=None):
|
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"role": "user",
|
| 38 |
-
"content":
|
| 39 |
-
{"type": "text", "text": user_prompt},
|
| 40 |
-
{"type": "image_url", "image_url": {"url": data_url}},
|
| 41 |
-
],
|
| 42 |
},
|
| 43 |
],
|
| 44 |
temperature=0.6,
|
|
@@ -63,10 +70,25 @@ def propose_operations(image_path: str, user_prompt: str = "Improve this image."
|
|
| 63 |
|
| 64 |
system_prompt = (
|
| 65 |
"You are an AI art director. "
|
| 66 |
-
"Your task is to analyze the provided image and suggest
|
| 67 |
-
"the user happy. You must suggest applying filters
|
| 68 |
-
"
|
| 69 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
)
|
| 71 |
response = call_to_llm(
|
| 72 |
image_path, model="google/gemma-3-27b-it", system_prompt=system_prompt, user_prompt=user_prompt
|
|
@@ -74,6 +96,38 @@ def propose_operations(image_path: str, user_prompt: str = "Improve this image."
|
|
| 74 |
return response
|
| 75 |
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
if __name__ == "__main__":
|
| 78 |
res = propose_operations(image_path="small_test_image.jpg")
|
| 79 |
print(res["choices"][0]["message"]["content"])
|
|
|
|
| 15 |
return f"data:{mime_type};base64,{base64_encoded_data}"
|
| 16 |
|
| 17 |
|
| 18 |
+
def call_to_llm(image_path, model, system_prompt=None, user_prompt=None, second_image_path=None):
|
| 19 |
img = Image.open(image_path)
|
| 20 |
data_url = pil_image_to_data_url(img, format=img.format)
|
| 21 |
|
| 22 |
+
user_content = [
|
| 23 |
+
{"type": "text", "text": user_prompt},
|
| 24 |
+
{"type": "image_url", "image_url": {"url": data_url}},
|
| 25 |
+
]
|
| 26 |
+
|
| 27 |
+
if second_image_path:
|
| 28 |
+
img2 = Image.open(second_image_path)
|
| 29 |
+
data_url2 = pil_image_to_data_url(img2, format=img2.format)
|
| 30 |
+
user_content.append({"type": "image_url", "image_url": {"url": data_url2}})
|
| 31 |
+
|
| 32 |
client = OpenAI(
|
| 33 |
base_url="https://api.studio.nebius.com/v1/",
|
| 34 |
api_key=os.environ.get("NEBIUS_TOKEN"),
|
|
|
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"role": "user",
|
| 48 |
+
"content": user_content,
|
|
|
|
|
|
|
|
|
|
| 49 |
},
|
| 50 |
],
|
| 51 |
temperature=0.6,
|
|
|
|
| 70 |
|
| 71 |
system_prompt = (
|
| 72 |
"You are an AI art director. "
|
| 73 |
+
"Your task is to analyze the provided image and suggest one operation to make"
|
| 74 |
+
"the user happy. You must suggest applying filters from the following list: "
|
| 75 |
+
"adjust_contrast"
|
| 76 |
+
"adjust_exposure"
|
| 77 |
+
"adjust_saturation"
|
| 78 |
+
"adjust_shadows_highlights"
|
| 79 |
+
"adjust_temperature"
|
| 80 |
+
"adjust_tint"
|
| 81 |
+
"adjust_hue_color"
|
| 82 |
+
"adjust_saturation_color"
|
| 83 |
+
"adjust_luminance_color"
|
| 84 |
+
"adjust_hsl_channel"
|
| 85 |
+
"add_vignette"
|
| 86 |
+
"denoise_image"
|
| 87 |
+
"add_grain"
|
| 88 |
+
r"In addition, you must suggest the amount of the operation to apply, in relative units : +10\% for instance."
|
| 89 |
+
"You must suggest only one operation at a time, and you must not invent new methods or tools."
|
| 90 |
+
"You must not perform any operations on the image yourself, only pass them to the picture operator."
|
| 91 |
+
"You must not evaluate the image yourself, only pass it to the critic."
|
| 92 |
)
|
| 93 |
response = call_to_llm(
|
| 94 |
image_path, model="google/gemma-3-27b-it", system_prompt=system_prompt, user_prompt=user_prompt
|
|
|
|
| 96 |
return response
|
| 97 |
|
| 98 |
|
| 99 |
+
@tool
|
| 100 |
+
def critic(new_image_path: str, old_image_path: str) -> str:
|
| 101 |
+
"""
|
| 102 |
+
Evaluates the new image against the old image and provides feedback.
|
| 103 |
+
|
| 104 |
+
Args:
|
| 105 |
+
new_image_path (str): The file path to the new image.
|
| 106 |
+
old_image_path (str): The file path to the old image.
|
| 107 |
+
|
| 108 |
+
Returns:
|
| 109 |
+
str: Feedback on the changes made to the image.
|
| 110 |
+
"""
|
| 111 |
+
system_prompt = (
|
| 112 |
+
"You are an AI art critic. "
|
| 113 |
+
"Your task is to evaluate the changes made to an image. "
|
| 114 |
+
"The first image in the new one and the second image is the old one. "
|
| 115 |
+
"Compare the new image with the old one and provide feedback on the changes."
|
| 116 |
+
"Your answer should be either 'good' or 'bad', "
|
| 117 |
+
"indicating whether the changes made to the image are satisfactory or not."
|
| 118 |
+
)
|
| 119 |
+
user_prompt = "Evaluate the changes made to this image."
|
| 120 |
+
|
| 121 |
+
response = call_to_llm(
|
| 122 |
+
new_image_path,
|
| 123 |
+
model="google/gemma-3-27b-it",
|
| 124 |
+
system_prompt=system_prompt,
|
| 125 |
+
user_prompt=user_prompt,
|
| 126 |
+
second_image_path=old_image_path,
|
| 127 |
+
)
|
| 128 |
+
return response["choices"][0]["message"]["content"]
|
| 129 |
+
|
| 130 |
+
|
| 131 |
if __name__ == "__main__":
|
| 132 |
res = propose_operations(image_path="small_test_image.jpg")
|
| 133 |
print(res["choices"][0]["message"]["content"])
|