Spaces:

Agents-MCP-Hackathon
/

AI-art-director

Sleeping

App Files Files Community

Alexandros Popov commited on Jun 4

Commit

20d32a0

1 Parent(s): bde9b8e

first art director / critic discussion.

Browse files

Files changed (2) hide show

agents.py +12 -4
judges.py +63 -9

agents.py CHANGED Viewed

@@ -50,13 +50,21 @@ art_director_model = InferenceClientModel(
     token=HUGGING_FACE_TOKEN,
 )
 art_director = CodeAgent(
-    tools=[jdg.propose_operations],
     model=art_director_model,
     managed_agents=[picture_operator],
-    name="ArtDirector",
     description=(
-        "Decides which filter to apply to the image in 'image_path'. "
-        "Give it your query as an argument, as well as the path to the image."
     ),
 )

     token=HUGGING_FACE_TOKEN,
 )
 art_director = CodeAgent(
+    tools=[jdg.propose_operations, jdg.critic],
     model=art_director_model,
     managed_agents=[picture_operator],
+    name="Manager",
     description=(
+        "You manage the relations between the art director, the picture operator and the critic."
+        "You must present the images to improve to the art director, who will propose operations to apply to the image."
+        "You must then pass the operations to the picture operator, who will apply them to the image."
+        "Finally, you must present the resulting image to the critic, who will evaluate it and give feedback."
+        "You must then decide whether to continue the process or stop it based on the critic's feedback."
+        "You must not perform any operations on the image yourself, only pass them to the picture operator."
+        "You must not evaluate the image yourself, only pass it to the critic."
+        "You must not propose operations yourself, only pass them to the picture operator."
+        "You must not invent new methods or tools, only use the ones provided."
+        "If you need, simply ignore a specific operation."
     ),
 )

judges.py CHANGED Viewed

@@ -15,10 +15,20 @@ def pil_image_to_data_url(pil_image, format="JPEG"):
     return f"data:{mime_type};base64,{base64_encoded_data}"
-def call_to_llm(image_path, model, system_prompt=None, user_prompt=None):
     img = Image.open(image_path)
     data_url = pil_image_to_data_url(img, format=img.format)
     client = OpenAI(
         base_url="https://api.studio.nebius.com/v1/",
         api_key=os.environ.get("NEBIUS_TOKEN"),
@@ -35,10 +45,7 @@ def call_to_llm(image_path, model, system_prompt=None, user_prompt=None):
             },
             {
                 "role": "user",
-                "content": [
-                    {"type": "text", "text": user_prompt},
-                    {"type": "image_url", "image_url": {"url": data_url}},
-                ],
             },
         ],
         temperature=0.6,
@@ -63,10 +70,25 @@ def propose_operations(image_path: str, user_prompt: str = "Improve this image."
     system_prompt = (
         "You are an AI art director. "
-        "Your task is to analyze the provided image and suggest a series of operations to make"
-        "the user happy. You must suggest applying filters, cropping, or other adjustments. "
-        "Provide a list of 5 different combinations of operations without explanations."
-        "No introductions, no conclusions, just the list of operations."
     )
     response = call_to_llm(
         image_path, model="google/gemma-3-27b-it", system_prompt=system_prompt, user_prompt=user_prompt
@@ -74,6 +96,38 @@ def propose_operations(image_path: str, user_prompt: str = "Improve this image."
     return response
 if __name__ == "__main__":
     res = propose_operations(image_path="small_test_image.jpg")
     print(res["choices"][0]["message"]["content"])

     return f"data:{mime_type};base64,{base64_encoded_data}"
+def call_to_llm(image_path, model, system_prompt=None, user_prompt=None, second_image_path=None):
     img = Image.open(image_path)
     data_url = pil_image_to_data_url(img, format=img.format)
+    user_content = [
+        {"type": "text", "text": user_prompt},
+        {"type": "image_url", "image_url": {"url": data_url}},
+    ]
+    if second_image_path:
+        img2 = Image.open(second_image_path)
+        data_url2 = pil_image_to_data_url(img2, format=img2.format)
+        user_content.append({"type": "image_url", "image_url": {"url": data_url2}})
     client = OpenAI(
         base_url="https://api.studio.nebius.com/v1/",
         api_key=os.environ.get("NEBIUS_TOKEN"),
             },
             {
                 "role": "user",
+                "content": user_content,
             },
         ],
         temperature=0.6,
     system_prompt = (
         "You are an AI art director. "
+        "Your task is to analyze the provided image and suggest one operation to make"
+        "the user happy. You must suggest applying filters from the following list: "
+        "adjust_contrast"
+        "adjust_exposure"
+        "adjust_saturation"
+        "adjust_shadows_highlights"
+        "adjust_temperature"
+        "adjust_tint"
+        "adjust_hue_color"
+        "adjust_saturation_color"
+        "adjust_luminance_color"
+        "adjust_hsl_channel"
+        "add_vignette"
+        "denoise_image"
+        "add_grain"
+        r"In addition, you must suggest the amount of the operation to apply, in relative units : +10\% for instance."
+        "You must suggest only one operation at a time, and you must not invent new methods or tools."
+        "You must not perform any operations on the image yourself, only pass them to the picture operator."
+        "You must not evaluate the image yourself, only pass it to the critic."
     )
     response = call_to_llm(
         image_path, model="google/gemma-3-27b-it", system_prompt=system_prompt, user_prompt=user_prompt
     return response
+@tool
+def critic(new_image_path: str, old_image_path: str) -> str:
+    """
+    Evaluates the new image against the old image and provides feedback.
+    Args:
+        new_image_path (str): The file path to the new image.
+        old_image_path (str): The file path to the old image.
+    Returns:
+        str: Feedback on the changes made to the image.
+    """
+    system_prompt = (
+        "You are an AI art critic. "
+        "Your task is to evaluate the changes made to an image. "
+        "The first image in the new one and the second image is the old one. "
+        "Compare the new image with the old one and provide feedback on the changes."
+        "Your answer should be either 'good' or 'bad', "
+        "indicating whether the changes made to the image are satisfactory or not."
+    )
+    user_prompt = "Evaluate the changes made to this image."
+    response = call_to_llm(
+        new_image_path,
+        model="google/gemma-3-27b-it",
+        system_prompt=system_prompt,
+        user_prompt=user_prompt,
+        second_image_path=old_image_path,
+    )
+    return response["choices"][0]["message"]["content"]
 if __name__ == "__main__":
     res = propose_operations(image_path="small_test_image.jpg")
     print(res["choices"][0]["message"]["content"])