Alexandros Popov commited on
Commit
20d32a0
·
1 Parent(s): bde9b8e

first art director / critic discussion.

Browse files
Files changed (2) hide show
  1. agents.py +12 -4
  2. judges.py +63 -9
agents.py CHANGED
@@ -50,13 +50,21 @@ art_director_model = InferenceClientModel(
50
  token=HUGGING_FACE_TOKEN,
51
  )
52
  art_director = CodeAgent(
53
- tools=[jdg.propose_operations],
54
  model=art_director_model,
55
  managed_agents=[picture_operator],
56
- name="ArtDirector",
57
  description=(
58
- "Decides which filter to apply to the image in 'image_path'. "
59
- "Give it your query as an argument, as well as the path to the image."
 
 
 
 
 
 
 
 
60
  ),
61
  )
62
 
 
50
  token=HUGGING_FACE_TOKEN,
51
  )
52
  art_director = CodeAgent(
53
+ tools=[jdg.propose_operations, jdg.critic],
54
  model=art_director_model,
55
  managed_agents=[picture_operator],
56
+ name="Manager",
57
  description=(
58
+ "You manage the relations between the art director, the picture operator and the critic."
59
+ "You must present the images to improve to the art director, who will propose operations to apply to the image."
60
+ "You must then pass the operations to the picture operator, who will apply them to the image."
61
+ "Finally, you must present the resulting image to the critic, who will evaluate it and give feedback."
62
+ "You must then decide whether to continue the process or stop it based on the critic's feedback."
63
+ "You must not perform any operations on the image yourself, only pass them to the picture operator."
64
+ "You must not evaluate the image yourself, only pass it to the critic."
65
+ "You must not propose operations yourself, only pass them to the picture operator."
66
+ "You must not invent new methods or tools, only use the ones provided."
67
+ "If you need, simply ignore a specific operation."
68
  ),
69
  )
70
 
judges.py CHANGED
@@ -15,10 +15,20 @@ def pil_image_to_data_url(pil_image, format="JPEG"):
15
  return f"data:{mime_type};base64,{base64_encoded_data}"
16
 
17
 
18
- def call_to_llm(image_path, model, system_prompt=None, user_prompt=None):
19
  img = Image.open(image_path)
20
  data_url = pil_image_to_data_url(img, format=img.format)
21
 
 
 
 
 
 
 
 
 
 
 
22
  client = OpenAI(
23
  base_url="https://api.studio.nebius.com/v1/",
24
  api_key=os.environ.get("NEBIUS_TOKEN"),
@@ -35,10 +45,7 @@ def call_to_llm(image_path, model, system_prompt=None, user_prompt=None):
35
  },
36
  {
37
  "role": "user",
38
- "content": [
39
- {"type": "text", "text": user_prompt},
40
- {"type": "image_url", "image_url": {"url": data_url}},
41
- ],
42
  },
43
  ],
44
  temperature=0.6,
@@ -63,10 +70,25 @@ def propose_operations(image_path: str, user_prompt: str = "Improve this image."
63
 
64
  system_prompt = (
65
  "You are an AI art director. "
66
- "Your task is to analyze the provided image and suggest a series of operations to make"
67
- "the user happy. You must suggest applying filters, cropping, or other adjustments. "
68
- "Provide a list of 5 different combinations of operations without explanations."
69
- "No introductions, no conclusions, just the list of operations."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  )
71
  response = call_to_llm(
72
  image_path, model="google/gemma-3-27b-it", system_prompt=system_prompt, user_prompt=user_prompt
@@ -74,6 +96,38 @@ def propose_operations(image_path: str, user_prompt: str = "Improve this image."
74
  return response
75
 
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  if __name__ == "__main__":
78
  res = propose_operations(image_path="small_test_image.jpg")
79
  print(res["choices"][0]["message"]["content"])
 
15
  return f"data:{mime_type};base64,{base64_encoded_data}"
16
 
17
 
18
+ def call_to_llm(image_path, model, system_prompt=None, user_prompt=None, second_image_path=None):
19
  img = Image.open(image_path)
20
  data_url = pil_image_to_data_url(img, format=img.format)
21
 
22
+ user_content = [
23
+ {"type": "text", "text": user_prompt},
24
+ {"type": "image_url", "image_url": {"url": data_url}},
25
+ ]
26
+
27
+ if second_image_path:
28
+ img2 = Image.open(second_image_path)
29
+ data_url2 = pil_image_to_data_url(img2, format=img2.format)
30
+ user_content.append({"type": "image_url", "image_url": {"url": data_url2}})
31
+
32
  client = OpenAI(
33
  base_url="https://api.studio.nebius.com/v1/",
34
  api_key=os.environ.get("NEBIUS_TOKEN"),
 
45
  },
46
  {
47
  "role": "user",
48
+ "content": user_content,
 
 
 
49
  },
50
  ],
51
  temperature=0.6,
 
70
 
71
  system_prompt = (
72
  "You are an AI art director. "
73
+ "Your task is to analyze the provided image and suggest one operation to make"
74
+ "the user happy. You must suggest applying filters from the following list: "
75
+ "adjust_contrast"
76
+ "adjust_exposure"
77
+ "adjust_saturation"
78
+ "adjust_shadows_highlights"
79
+ "adjust_temperature"
80
+ "adjust_tint"
81
+ "adjust_hue_color"
82
+ "adjust_saturation_color"
83
+ "adjust_luminance_color"
84
+ "adjust_hsl_channel"
85
+ "add_vignette"
86
+ "denoise_image"
87
+ "add_grain"
88
+ r"In addition, you must suggest the amount of the operation to apply, in relative units : +10\% for instance."
89
+ "You must suggest only one operation at a time, and you must not invent new methods or tools."
90
+ "You must not perform any operations on the image yourself, only pass them to the picture operator."
91
+ "You must not evaluate the image yourself, only pass it to the critic."
92
  )
93
  response = call_to_llm(
94
  image_path, model="google/gemma-3-27b-it", system_prompt=system_prompt, user_prompt=user_prompt
 
96
  return response
97
 
98
 
99
+ @tool
100
+ def critic(new_image_path: str, old_image_path: str) -> str:
101
+ """
102
+ Evaluates the new image against the old image and provides feedback.
103
+
104
+ Args:
105
+ new_image_path (str): The file path to the new image.
106
+ old_image_path (str): The file path to the old image.
107
+
108
+ Returns:
109
+ str: Feedback on the changes made to the image.
110
+ """
111
+ system_prompt = (
112
+ "You are an AI art critic. "
113
+ "Your task is to evaluate the changes made to an image. "
114
+ "The first image in the new one and the second image is the old one. "
115
+ "Compare the new image with the old one and provide feedback on the changes."
116
+ "Your answer should be either 'good' or 'bad', "
117
+ "indicating whether the changes made to the image are satisfactory or not."
118
+ )
119
+ user_prompt = "Evaluate the changes made to this image."
120
+
121
+ response = call_to_llm(
122
+ new_image_path,
123
+ model="google/gemma-3-27b-it",
124
+ system_prompt=system_prompt,
125
+ user_prompt=user_prompt,
126
+ second_image_path=old_image_path,
127
+ )
128
+ return response["choices"][0]["message"]["content"]
129
+
130
+
131
  if __name__ == "__main__":
132
  res = propose_operations(image_path="small_test_image.jpg")
133
  print(res["choices"][0]["message"]["content"])