jkorstad commited on
Commit
903bdac
Β·
verified Β·
1 Parent(s): 1ab5d95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -22
app.py CHANGED
@@ -108,7 +108,6 @@ tools.append(space_search_tool)
108
  model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
109
 
110
  # Create the agent
111
- # Removed python_globals from constructor
112
  agent = CodeAgent(
113
  tools=tools,
114
  model=model,
@@ -117,6 +116,7 @@ agent = CodeAgent(
117
  )
118
 
119
  AGENT_INSTRUCTIONS = """You are a highly capable AI assistant. Your primary goal is to accomplish tasks using a variety of tools, prioritizing Hugging Face Spaces.
 
120
  Follow these steps:
121
  1. **Understand the Request:** Carefully analyze the user's prompt. Identify the core task and any specific requirements or inputs.
122
  2. **Check Predefined Tools:** Review your list of available tools. If a predefined tool can directly address the request, use it.
@@ -128,10 +128,12 @@ Follow these steps:
128
  5. **Execute the Tool:** Call the tool (predefined, or dynamically created via `Tool.from_space()`) with the necessary arguments.
129
  * **File Inputs:** If the user uploads files, their paths will be available as global string variables: `input_image_path`, `input_audio_path`, `input_video_path`, `input_3d_model_path`, `input_file_path`. Before using these variables, check if they exist and are not None. Pass these file paths as arguments to tools that require them.
130
  * **Imports in Generated Code:** If your code block for execution uses modules like `os` or `uuid`, **you must include the import statements (e.g., `import os`, `import uuid`) within that specific code block.**
131
- 6. **Output Management:**
132
- * **If a tool returns a filepath string (e.g., to an image, audio, or other file), your final answer for this step should usually be that direct filepath string.** Do NOT attempt to re-save the file using `os.path.join` or `image.save()` unless you are performing an explicit transformation on the file content that requires loading and then saving. The system is designed to handle these returned filepaths.
133
- * If a tool returns text, return that text.
 
134
  7. **Clarity and Error Handling:** If you encounter issues (e.g., a Space tool fails, required inputs are missing), clearly explain the problem in your response. If a Space doesn't work, try to explain why or suggest an alternative if possible.
 
135
  Example of the **CORRECT AND PREFERRED** way to use a discovered Space:
136
  ```python
137
  # User prompt: "Find a space that can make an image of a cat and use it."
@@ -146,20 +148,21 @@ Example of the **CORRECT AND PREFERRED** way to use a discovered Space:
146
  # # Now use the newly created tool. Arguments depend on the Space's API.
147
  # # Let's assume it takes a 'prompt'.
148
  # image_filepath = cat_tool(prompt="A fluffy siamese cat, cyberpunk style")
149
- # return image_filepath # Return the filepath directly
150
  # except Exception as e:
151
  # print(f"Failed to create or use tool from Space 'someuser/cat-image-generator' using Tool.from_space(): {e}")
152
  # # If Tool.from_space() fails, DO NOT immediately try gradio_client.Client().
153
  # # Instead, consider another space or a predefined tool.
154
  # # return "Could not use the discovered space via Tool.from_space(). Trying a fallback..." (then try another step)
155
  ```
 
156
  Example of using a predefined tool that returns a filepath:
157
  ```python
158
  # User prompt: "Generate an image of a happy robot."
159
  # (Assuming 'image_generator_flux_schnell' is a predefined tool)
160
  #
161
  # image_filepath = image_generator_flux_schnell(prompt="A happy robot coding on a laptop, cyberpunk style")
162
- # return image_filepath # Return the filepath string directly.
163
  ```
164
  Always ensure your generated Python code is complete and directly callable.
165
  You have access to `PIL.Image` (as `Image`), `os`, `sys`, `numpy`, `huggingface_hub`, `gradio_client`, `uuid`. Remember to import them if you use them in a code block.
@@ -168,7 +171,8 @@ You have access to `PIL.Image` (as `Image`), `os`, `sys`, `numpy`, `huggingface_
168
  # Gradio interface function
169
  def gradio_interface(user_prompt, input_image_path, input_audio_path, input_video_path, input_3d_model_path, input_file_path, progress=gr.Progress(track_tqdm=True)):
170
  try:
171
- progress(0, desc="Initializing Agent...")
 
172
  full_prompt_with_instructions = f"{AGENT_INSTRUCTIONS}\n\nUSER PROMPT: {user_prompt}"
173
 
174
  dynamic_globals_for_run = {}
@@ -210,9 +214,7 @@ def gradio_interface(user_prompt, input_image_path, input_audio_path, input_vide
210
  print(f"Restored agent.python_interpreter.globals.")
211
  else:
212
  print("Warning: Could not restore python_interpreter globals.")
213
-
214
-
215
- progress(0.8, desc="Processing result...")
216
  outputs = {
217
  "image": gr.update(value=None, visible=False), "file": gr.update(value=None, visible=False),
218
  "path": gr.update(value=None, visible=False), "audio": gr.update(value=None, visible=False),
@@ -233,13 +235,15 @@ def gradio_interface(user_prompt, input_image_path, input_audio_path, input_vide
233
  elif result is None: outputs["text"] = gr.update(value="Agent returned no result (None).", visible=True)
234
  else: outputs["text"] = gr.update(value=f"Unexpected result type: {type(result)}. Content: {str(result)}", visible=True)
235
 
236
- progress(1, desc="Done!")
 
237
  return (outputs["image"], outputs["file"], outputs["path"], outputs["audio"], outputs["model3d"], outputs["text"])
238
 
239
  except Exception as e:
240
  error_msg = f"An error occurred: {str(e)}"
241
  print(error_msg)
242
  traceback.print_exc()
 
243
  return (None, None, None, None, None, gr.update(value=error_msg, visible=True))
244
 
245
  # Create the Gradio app
@@ -251,14 +255,18 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
251
  prompt_input = gr.Textbox(label="Enter your prompt", placeholder="e.g., 'Generate an image of a futuristic city'", lines=3, elem_id="user_prompt_textbox")
252
 
253
  with gr.Accordion("Optional File Inputs", open=False):
254
- with gr.Row():
255
- input_image = gr.Image(label="Image Input", type="filepath", sources=["upload", "clipboard"], elem_id="input_image_upload")
256
- input_audio = gr.Audio(label="Audio Input", type="filepath", sources=["upload", "microphone"], elem_id="input_audio_upload")
257
- with gr.Row():
258
- input_video = gr.Video(label="Video Input", sources=["upload"], elem_id="input_video_upload")
259
- input_model3d = gr.Model3D(label="3D Model Input", elem_id="input_model3d_upload")
260
- with gr.Row():
261
- input_file = gr.File(label="Generic File Input", type="filepath", elem_id="input_file_upload")
 
 
 
 
262
 
263
  submit_button = gr.Button("πŸš€ Generate", variant="primary", elem_id="submit_button_generate")
264
 
@@ -283,11 +291,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
283
  examples=[
284
  ["Generate an image of a happy robot coding on a laptop, cyberpunk style.", None, None, None, None, None],
285
  ["Convert the following text to speech: 'Smolagents are amazing for building AI applications.'", None, None, None, None, None],
286
- ["Search for a Hugging Face Space that can perform image captioning. Describe the first result.", None, None, None, None, None],
287
  ["I have an image of a robot. Make this image Ghibli style.", "Happy Robot Coding.webp", None, None, None, None],
288
  ["Generate an EDM jazz song about a futuristic city.", None, None, None, None, None],
289
- ["Extract text from the uploaded PDF file. (Upload a PDF)", None, None, None, None, None], # User would replace path or upload
290
- ["Search for a Hugging Face Space that can translate English to Spanish, then use it to translate: 'Good morning, how are you?'", None, None, None, None, None],
291
  ],
292
  inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
293
  label="Example Prompts (Note: For examples with file inputs, you'll need to upload a relevant file first or ensure the named file exists in the Space's root)"
 
108
  model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct")
109
 
110
  # Create the agent
 
111
  agent = CodeAgent(
112
  tools=tools,
113
  model=model,
 
116
  )
117
 
118
  AGENT_INSTRUCTIONS = """You are a highly capable AI assistant. Your primary goal is to accomplish tasks using a variety of tools, prioritizing Hugging Face Spaces.
119
+
120
  Follow these steps:
121
  1. **Understand the Request:** Carefully analyze the user's prompt. Identify the core task and any specific requirements or inputs.
122
  2. **Check Predefined Tools:** Review your list of available tools. If a predefined tool can directly address the request, use it.
 
128
  5. **Execute the Tool:** Call the tool (predefined, or dynamically created via `Tool.from_space()`) with the necessary arguments.
129
  * **File Inputs:** If the user uploads files, their paths will be available as global string variables: `input_image_path`, `input_audio_path`, `input_video_path`, `input_3d_model_path`, `input_file_path`. Before using these variables, check if they exist and are not None. Pass these file paths as arguments to tools that require them.
130
  * **Imports in Generated Code:** If your code block for execution uses modules like `os` or `uuid`, **you must include the import statements (e.g., `import os`, `import uuid`) within that specific code block.**
131
+ 6. **Output Management & Concluding a Step:**
132
+ * When your code block for a step is complete and has a result (e.g., a text string, a filepath from a tool), use the `return` statement (e.g., `return my_result_variable`).
133
+ * The system will use this returned value. You might see "ReturnException" in system logs; this is a normal part of a successful `return` and not an error you need to act upon. Based on the returned value, decide on your next action or if the task is complete.
134
+ * **If the entire user request is satisfied by the value you are returning, that `return` statement concludes your work for the current task.** You do not need to call `final_answer()` yourself; the system handles this based on your `return`.
135
  7. **Clarity and Error Handling:** If you encounter issues (e.g., a Space tool fails, required inputs are missing), clearly explain the problem in your response. If a Space doesn't work, try to explain why or suggest an alternative if possible.
136
+
137
  Example of the **CORRECT AND PREFERRED** way to use a discovered Space:
138
  ```python
139
  # User prompt: "Find a space that can make an image of a cat and use it."
 
148
  # # Now use the newly created tool. Arguments depend on the Space's API.
149
  # # Let's assume it takes a 'prompt'.
150
  # image_filepath = cat_tool(prompt="A fluffy siamese cat, cyberpunk style")
151
+ # return image_filepath # Return the filepath directly. This is the final result for this task.
152
  # except Exception as e:
153
  # print(f"Failed to create or use tool from Space 'someuser/cat-image-generator' using Tool.from_space(): {e}")
154
  # # If Tool.from_space() fails, DO NOT immediately try gradio_client.Client().
155
  # # Instead, consider another space or a predefined tool.
156
  # # return "Could not use the discovered space via Tool.from_space(). Trying a fallback..." (then try another step)
157
  ```
158
+
159
  Example of using a predefined tool that returns a filepath:
160
  ```python
161
  # User prompt: "Generate an image of a happy robot."
162
  # (Assuming 'image_generator_flux_schnell' is a predefined tool)
163
  #
164
  # image_filepath = image_generator_flux_schnell(prompt="A happy robot coding on a laptop, cyberpunk style")
165
+ # return image_filepath # Return the filepath string directly. This is the final result for this task.
166
  ```
167
  Always ensure your generated Python code is complete and directly callable.
168
  You have access to `PIL.Image` (as `Image`), `os`, `sys`, `numpy`, `huggingface_hub`, `gradio_client`, `uuid`. Remember to import them if you use them in a code block.
 
171
  # Gradio interface function
172
  def gradio_interface(user_prompt, input_image_path, input_audio_path, input_video_path, input_3d_model_path, input_file_path, progress=gr.Progress(track_tqdm=True)):
173
  try:
174
+ progress(0, desc="Initializing...") # Step 0
175
+ print("Progress: 0% - Initializing...")
176
  full_prompt_with_instructions = f"{AGENT_INSTRUCTIONS}\n\nUSER PROMPT: {user_prompt}"
177
 
178
  dynamic_globals_for_run = {}
 
214
  print(f"Restored agent.python_interpreter.globals.")
215
  else:
216
  print("Warning: Could not restore python_interpreter globals.")
217
+
 
 
218
  outputs = {
219
  "image": gr.update(value=None, visible=False), "file": gr.update(value=None, visible=False),
220
  "path": gr.update(value=None, visible=False), "audio": gr.update(value=None, visible=False),
 
235
  elif result is None: outputs["text"] = gr.update(value="Agent returned no result (None).", visible=True)
236
  else: outputs["text"] = gr.update(value=f"Unexpected result type: {type(result)}. Content: {str(result)}", visible=True)
237
 
238
+ progress(1, desc="Done!") # Step 3: All processing finished
239
+ print("Progress: 100% - Done!")
240
  return (outputs["image"], outputs["file"], outputs["path"], outputs["audio"], outputs["model3d"], outputs["text"])
241
 
242
  except Exception as e:
243
  error_msg = f"An error occurred: {str(e)}"
244
  print(error_msg)
245
  traceback.print_exc()
246
+ progress(1, desc="Error occurred.") # Ensure progress completes on error
247
  return (None, None, None, None, None, gr.update(value=error_msg, visible=True))
248
 
249
  # Create the Gradio app
 
255
  prompt_input = gr.Textbox(label="Enter your prompt", placeholder="e.g., 'Generate an image of a futuristic city'", lines=3, elem_id="user_prompt_textbox")
256
 
257
  with gr.Accordion("Optional File Inputs", open=False):
258
+ # Using gr.Group for better visual separation of input groups
259
+ with gr.Group():
260
+ with gr.Row():
261
+ input_image = gr.Image(label="Image Input", type="filepath", sources=["upload", "clipboard"], elem_id="input_image_upload")
262
+ input_audio = gr.Audio(label="Audio Input", type="filepath", sources=["upload", "microphone"], elem_id="input_audio_upload")
263
+ with gr.Group():
264
+ with gr.Row():
265
+ input_video = gr.Video(label="Video Input", sources=["upload"], elem_id="input_video_upload")
266
+ input_model3d = gr.Model3D(label="3D Model Input", elem_id="input_model3d_upload")
267
+ with gr.Group():
268
+ with gr.Row():
269
+ input_file = gr.File(label="Generic File Input (PDF, TXT, etc.)", type="filepath", elem_id="input_file_upload")
270
 
271
  submit_button = gr.Button("πŸš€ Generate", variant="primary", elem_id="submit_button_generate")
272
 
 
291
  examples=[
292
  ["Generate an image of a happy robot coding on a laptop, cyberpunk style.", None, None, None, None, None],
293
  ["Convert the following text to speech: 'Smolagents are amazing for building AI applications.'", None, None, None, None, None],
294
+ ["Search for a Hugging Face Space that can perform image captioning. Describe the Caption the following image.", "Happy Robot Coding.webp", None, None, None, None],
295
  ["I have an image of a robot. Make this image Ghibli style.", "Happy Robot Coding.webp", None, None, None, None],
296
  ["Generate an EDM jazz song about a futuristic city.", None, None, None, None, None],
297
+ ["Generate audio of a dog barking.", None, None, None, None, None],
 
298
  ],
299
  inputs=[prompt_input, input_image, input_audio, input_video, input_model3d, input_file],
300
  label="Example Prompts (Note: For examples with file inputs, you'll need to upload a relevant file first or ensure the named file exists in the Space's root)"