marluwe commited on
Commit
acfd37c
·
verified ·
1 Parent(s): 4969ca1

Upload 2 files

Browse files
Files changed (2) hide show
  1. agents.py +224 -0
  2. app.py +285 -9
agents.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from smolagents import CodeAgent, LiteLLMModel, load_tool, ToolCollection, HfApiModel, InferenceClientModel, TransformersModel, OpenAIServerModel
3
+ from smolagents import ToolCallingAgent, PythonInterpreterTool, tool, WikipediaSearchTool
4
+ from smolagents import DuckDuckGoSearchTool, FinalAnswerTool, VisitWebpageTool, SpeechToTextTool
5
+ from mcp import StdioServerParameters
6
+ from huggingface_hub import HfApi, login
7
+ from dotenv import load_dotenv
8
+ from typing import Optional
9
+ from models.gemini_model import GeminiModel
10
+ import requests
11
+ import re
12
+ import string
13
+ import random
14
+ import textwrap
15
+ import nltk
16
+ import spacy
17
+
18
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
19
+
20
+
21
+ @tool
22
+ def download_file(task_id: str) -> str:
23
+ """
24
+ Returns the file path of the downloaded file.
25
+
26
+ Args:
27
+ task_id: the ID of the task to download the file for.
28
+ """
29
+ # Implement your file download logic here
30
+ data = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
31
+ if data.status_code == 200:
32
+ file_path = f"/tmp/{task_id}"
33
+ with open(file_path, "wb") as file:
34
+ file.write(data.content)
35
+ return file_path
36
+ else:
37
+ raise Exception(f"Failed to download file: {data.status_code}")
38
+
39
+ @tool
40
+ def get_file_content_as_text(task_id: str) -> str:
41
+ """
42
+ Returns the content of the file as text.
43
+
44
+ Args:
45
+ task_id: the ID of the task to get the file content for.
46
+ """
47
+ # Implement your file content retrieval logic here
48
+ data = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
49
+ if data.status_code == 200:
50
+ return data.text
51
+ else:
52
+ raise Exception(f"Failed to get file content: {data.status_code}")
53
+
54
+
55
+
56
+ def load_hf_model(modelName: str):
57
+ """
58
+ Lädt ein Hugging Face Modell und gibt den Agenten zurück.
59
+ :param modelName: Name des Modells
60
+ :return: model
61
+ """
62
+ load_dotenv() # Lädt automatisch .env im Projektordner
63
+
64
+ hf_token = os.getenv("hugging_face")
65
+ login(token=hf_token) # Authentifizierung bei Hugging Face
66
+ # Modell initialisieren
67
+ model = HfApiModel(model_id=modelName)
68
+ return model
69
+
70
+
71
+ def load_ollama_model(modelName: str):
72
+ """
73
+ Lädt ein Ollama Modell und gibt den Agenten zurück.
74
+ :param modelName: Name des Modells
75
+ :return: model
76
+ """
77
+ # Modell initialisieren
78
+ model = OpenAIServerModel(model_id=modelName, api_base="http://localhost:11434/v1")
79
+ return model
80
+
81
+ def load_lmStudio_model(modelName: str):
82
+ """
83
+ Lädt ein LM Studio Modell und gibt den Agenten zurück.
84
+ :param modelName: Name des Modells
85
+ :return: model
86
+ """
87
+ # Modell initialisieren
88
+ #model = LiteLLMModel(model_id=modelName, api_base="http://localhost:1234")
89
+ model = OpenAIServerModel(model_id=modelName, api_base="http://localhost:1234/v1")
90
+ return model
91
+
92
+ def load_gemini_model():
93
+ """
94
+ Lädt ein Gemini Modell und gibt den Agenten zurück.
95
+ :return: model
96
+ """
97
+ try:
98
+ print(f"Gemini API Key: {os.getenv('GEMINI_API_KEY')}")
99
+ model = LiteLLMModel(model_id="gemini/gemini-2.0-flash-exp",
100
+ api_key=os.getenv("GEMINI_API_KEY"))
101
+ #model = GeminiModel(api_key=os.getenv("GEMINI_API_KEY"))
102
+ return model
103
+ except Exception as e:
104
+ print("Error loading Gemini model:", e)
105
+ return None
106
+
107
+
108
+
109
+ def get_agent(model_name:str, model_type:str) -> Optional[CodeAgent]:
110
+ # Modell initialisieren
111
+
112
+ match model_type:
113
+ case "hugging face":
114
+ model = load_hf_model(model_name)
115
+ case "Ollama":
116
+ model = load_ollama_model(model_name)
117
+ case "Gemini":
118
+ model = load_gemini_model()
119
+ case "LMStudio":
120
+ model = load_lmStudio_model(model_name)
121
+ case _:
122
+ print("Model type not supported.")
123
+ return None
124
+
125
+ #model = load_lmStudio_model("gemma-3-4b-it")
126
+ #model = load_gemini_model()
127
+ #mopip del = HfApiModel()
128
+ #model=InferenceClientModel(model_id="meta-llama/Meta-Llama-3.1-8B-Instruct")
129
+ #model = TransformersModel(model_id="HuggingFaceTB/SmolLM-135M-Instruct")
130
+ # Tools laden
131
+ web_search_tool = DuckDuckGoSearchTool()
132
+ final_answer_tool = FinalAnswerTool()
133
+ visit_webpage_tool = VisitWebpageTool()
134
+
135
+ #speech_to_text_tool = SpeechToTextTool()
136
+ #transcript_tool = load_tool("maguid28/TranscriptTool", trust_remote_code=True)
137
+
138
+ #mcp_tool_collection = ToolCollection.from_mcp(server_parameters, trust_remote_code=True)
139
+ #with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as tool_collection:
140
+ # mcp_tool_agent = CodeAgent(tools=[*tool_collection.tools], add_base_tools=True)
141
+
142
+ #server_parameters = StdioServerParameters(
143
+ # command="uv",
144
+ # args=["--quiet", "pubmedmcp@0.1.3"],
145
+ # env={"UV_PYTHON": "3.12", **os.environ},
146
+ #)
147
+ #
148
+ #with ToolCollection.from_mcp(server_parameters, trust_remote_code=True) as tool_collection:
149
+ # mcp_agent = CodeAgent(tools=[*tool_collection.tools], model=model, add_base_tools=True)
150
+
151
+ variation_agent = CodeAgent(
152
+ model=model,
153
+ tools=[PythonInterpreterTool()],
154
+ name="variation_agent",
155
+ description="Get the user question and checks if the given question makes sense at all, if not, we try to modify the text like reverse. Provide the content / the questin as the 'task' argument." \
156
+ "The agent can write professional python code, focused on modifiying texts." \
157
+ "It has access to the following libraries: re, string, random, textwrap, nltk and spacy." \
158
+ "The goal is to find out, if a user question is a trick, and we might modify the content.",
159
+ additional_authorized_imports=[
160
+ "re",
161
+ "string",
162
+ "random",
163
+ "textwrap",
164
+ "nltk",
165
+ "spacy"
166
+ ]
167
+ )
168
+ variation_agent.system_prompt = "You are a text variation agent. You can write professional python code, focused on modifiying texts." \
169
+ "You can use the following libraries: re, string, random, textwrap, nltk and spacy." \
170
+ "Your goal is to find out, if a user question is a trick, and we might modify the content."
171
+
172
+ code_agent = CodeAgent(
173
+ name="code_agent",
174
+ description="Can generate code an run it. It provides the possibility to download additional files if needed.",
175
+ model=model,
176
+ tools=[download_file, PythonInterpreterTool(), get_file_content_as_text],
177
+ additional_authorized_imports=[
178
+ "geopandas",
179
+ "plotly",
180
+ "shapely",
181
+ "json",
182
+ "pandas",
183
+ "numpy",
184
+ ],
185
+ verbosity_level=2,
186
+ #final_answer_checks=[FinalAnswerTool()],
187
+ max_steps=5,
188
+ )
189
+
190
+ final_answer_tool = FinalAnswerTool()
191
+ final_answer_tool.description = "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."
192
+
193
+ tool_agent = CodeAgent(
194
+ model=model,
195
+ tools=[web_search_tool, visit_webpage_tool, WikipediaSearchTool(), final_answer_tool],
196
+ verbosity_level=2,
197
+ max_steps=15,
198
+ managed_agents=[code_agent, variation_agent],
199
+ planning_interval=5,
200
+ )
201
+
202
+ return tool_agent
203
+ # return tool_agent
204
+
205
+ manager_agent = CodeAgent(
206
+ #model=HfApiModel("deepseek-ai/DeepSeek-R1", provider="together", max_tokens=8096),
207
+ model=model,
208
+ tools=[web_search_tool, visit_webpage_tool],
209
+ # managed_agents=[mcp_tool_agent],
210
+ additional_authorized_imports=[
211
+ "geopandas",
212
+ "plotly",
213
+ "shapely",
214
+ "json",
215
+ "pandas",
216
+ "numpy",
217
+ ],
218
+ planning_interval=5,
219
+ verbosity_level=2,
220
+ #final_answer_checks=[FinalAnswerTool()],
221
+ max_steps=15
222
+ )
223
+
224
+ return manager_agent
app.py CHANGED
@@ -3,11 +3,60 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
 
 
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
@@ -19,11 +68,135 @@ class BasicAgent:
19
  print(f"Agent returning fixed answer: {fixed_answer}")
20
  return fixed_answer
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
  Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
 
 
27
  # --- Determine HF Space Runtime URL and Repo URL ---
28
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
@@ -140,37 +313,140 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
140
  return status_message, results_df
141
 
142
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  # --- Build Gradio Interface using Blocks ---
144
  with gr.Blocks() as demo:
145
  gr.Markdown("# Basic Agent Evaluation Runner")
 
 
 
146
  gr.Markdown(
147
  """
148
  **Instructions:**
149
 
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
-
154
- ---
155
- **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
  """
159
  )
160
 
161
- gr.LoginButton()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
  run_button = gr.Button("Run Evaluation & Submit All Answers")
164
 
 
 
 
 
 
 
165
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
  # Removed max_rows=10 from DataFrame constructor
167
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
 
 
 
 
 
 
 
 
 
 
 
169
  run_button.click(
170
  fn=run_and_submit_all,
171
  outputs=[status_output, results_table]
172
  )
173
 
 
 
 
 
 
174
  if __name__ == "__main__":
175
  print("\n" + "-"*30 + " App Starting " + "-"*30)
176
  # Check for SPACE_HOST and SPACE_ID at startup for information
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from agents import agents
7
+ from PIL import Image
8
+ from io import BytesIO
9
+ import whisper
10
 
11
  # (Keep Constants as is)
12
  # --- Constants ---
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
+ # --- Load Agent ---
16
+ # 1. Instantiate Agent ( modify this part to create your agent)
17
+
18
+ agent = None
19
+
20
+ def select_agent(provider_name:str, model_name: str):
21
+ """
22
+ Selects the agent based on the provided name.
23
+ :param agent_name: Name of the agent to select.
24
+ :return: The selected agent instance.
25
+ """
26
+ global agent
27
+ try:
28
+ agent = agents.get_agent(model_name=model_name, model_type=provider_name)
29
+ if agent is None:
30
+ print(f"Agent not found for provider: {provider_name} and model: {model_name}")
31
+ agent = BasicAgent()
32
+ except Exception as e:
33
+ print(f"Error selecting agent: {e}")
34
+ agent = BasicAgent()
35
+ # Update ui to indicate the selected agent
36
+ print(f"Agent selected: {agent.model}")
37
+ agent_info_text.value = get_agent_info()
38
+ return agent
39
+
40
+
41
+ def get_agent_info() -> str:
42
+ global agent
43
+ if (agent is None):
44
+ return "No agent selected."
45
+ try:
46
+ # Get the agent's class name
47
+ agent_class_name = agent.__class__.__name__
48
+ # Get the agent's model name
49
+ model_name = agent.model
50
+ # Get the agent's docstring
51
+ docstring = inspect.getdoc(agent)
52
+ # Format the information
53
+ info = f"Agent Class: {agent_class_name}\nModel Name: {model_name}\nDocstring: {docstring}"
54
+ return info
55
+ except Exception as e:
56
+ print(f"Error getting agent info: {e}")
57
+ return "Error getting agent info."
58
+
59
+
60
  # --- Basic Agent Definition ---
61
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
62
  class BasicAgent:
 
68
  print(f"Agent returning fixed answer: {fixed_answer}")
69
  return fixed_answer
70
 
71
+
72
+ def get_all_questions():
73
+ """
74
+ Fetches all available questions from the API.
75
+ """
76
+ yield from run_test_on_questions(False, False)
77
+
78
+ def run_test_on_all_questions():
79
+ """
80
+ Runs tests on all available questions by forwarding yields from run_test_on_questions.
81
+ """
82
+ yield from run_test_on_questions(False, True)
83
+
84
+ def run_test_on_random_question():
85
+ """
86
+ Runs a single test on a random available question by forwarding yields from run_test_on_questions.
87
+ """
88
+ yield from run_test_on_questions(True, True)
89
+
90
+
91
+ def run_test_on_questions(use_random_question: bool, run_agent:bool):
92
+ """
93
+ Fetches all questions, runs the BasicAgent on them, submits all answers,
94
+ and displays the results.
95
+ """
96
+
97
+ global agent
98
+ api_url = DEFAULT_API_URL
99
+ questions_url = f"{api_url}/random-question" if use_random_question else f"{api_url}/questions"
100
+
101
+
102
+ # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
103
+ info = "# started request"
104
+ yield info, None
105
+ # 2. Fetch Questions
106
+ print(f"Fetching questions from: {questions_url}")
107
+ try:
108
+ response = requests.get(questions_url, timeout=15)
109
+ response.raise_for_status()
110
+ questions_dataset_raw = response.json()
111
+ questions_dataset = [questions_dataset_raw] if use_random_question else questions_dataset_raw
112
+ yield info, None
113
+ if not questions_dataset:
114
+ print("Fetched questions list is empty.")
115
+ yield info +"\n\nFetched questions list is empty or invalid format.", None
116
+ return
117
+ print(f"Fetched {len(questions_dataset)} questions.")
118
+ except requests.exceptions.RequestException as e:
119
+ print(f"Error fetching questions: {e}")
120
+ yield f"Error fetching questions: {e}", None
121
+ return
122
+ except requests.exceptions.JSONDecodeError as e:
123
+ print(f"Error decoding JSON response from questions endpoint: {e}")
124
+ print(f"Response text: {response.text[:500]}")
125
+ yield f"Error decoding server response for questions: {e}", None
126
+ return
127
+ except Exception as e:
128
+ print(f"An unexpected error occurred fetching questions: {e}")
129
+ yield f"An unexpected error occurred fetching questions: {e}", None
130
+ return
131
+
132
+ # 3. Run your Agent
133
+ results_log = []
134
+ answers_payload = []
135
+ # loop over all questions
136
+ for i, questions_data in enumerate(questions_dataset):
137
+
138
+ agent.memory.reset()
139
+ images = []
140
+ task_id = questions_data.get("task_id")
141
+ question_text = questions_data.get("question")
142
+ file_name = questions_data.get("file_name")
143
+ if (file_name != "" and file_name is not None):
144
+ question_text = question_text + f"\n\nYou can download the correspondig file using the download tool with the task id: {task_id}."
145
+ fileData = requests.get(f"{DEFAULT_API_URL}/files/{task_id}")
146
+ # check if file is an image
147
+ if fileData.headers['Content-Type'] in ['image/png', 'image/jpeg']:
148
+ image = Image.open(BytesIO(fileData.content)).convert("RGB")
149
+ images = [image]
150
+ if fileData.headers['Content-Type'] in ['audio/mpeg', 'audio/wav']:
151
+ # Load the audio file using Whisper
152
+ model = whisper.load_model("base")
153
+ # MP3-Datei von der API abrufen
154
+ with open("temp_audio.mp3", "wb") as f:
155
+ f.write(fileData.content)
156
+
157
+ # Transkription durchführen
158
+ audioContent = model.transcribe("temp_audio.mp3")
159
+ question_text = question_text + f"\n\nTranscription: {audioContent['text']}"
160
+ info += f"\n\nRunning agent on question {i+1}/{len(questions_dataset)}:\n - task_id: {task_id}\n - question: {question_text}"
161
+ yield info, None
162
+ if not task_id or question_text is None:
163
+ yield info+ f"\nError in question data: {questions_data}", None
164
+ return
165
+ try:
166
+ submitted_answer = agent.run(question_text, images=images) if run_agent else "-- no agent interaction --"
167
+ info += f"\n - got answer {submitted_answer}"
168
+ answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
169
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer, "FileInfo": file_name})
170
+ except Exception as e:
171
+ print(f"Error running agent on task {task_id}: {e}")
172
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}", "FileInfo": file_name})
173
+
174
+ if not answers_payload:
175
+ print("Agent did not produce any answers.")
176
+ yield info + "\nAgent did not produce any answers.", pd.DataFrame(results_log)
177
+ return
178
+
179
+ # 5. Submit
180
+
181
+ try:
182
+ results_df = pd.DataFrame(results_log)
183
+ yield info + "\nGot an answer from agent", results_df
184
+ except Exception as e:
185
+ status_message = f"An unexpected error occurred during submission: {e}"
186
+ print(status_message)
187
+ results_df = pd.DataFrame(results_log)
188
+ yield status_message, results_df
189
+ return
190
+
191
+
192
+
193
  def run_and_submit_all( profile: gr.OAuthProfile | None):
194
  """
195
  Fetches all questions, runs the BasicAgent on them, submits all answers,
196
  and displays the results.
197
  """
198
+
199
+ return "We are not there yet", None
200
  # --- Determine HF Space Runtime URL and Repo URL ---
201
  space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
202
 
 
313
  return status_message, results_df
314
 
315
 
316
+ def fetch_ollama_models() -> list:
317
+ """
318
+ Fetches available models from the Ollama server.
319
+ :return: List of available models.
320
+ """
321
+ try:
322
+ response = requests.get("http://localhost:11434/api/tags")
323
+ response.raise_for_status()
324
+ data = response.json()
325
+ return [model["name"] for model in data["models"]]
326
+ except requests.exceptions.RequestException as e:
327
+ print(f"Error fetching Ollama models: {e}")
328
+ return ["None"]
329
+ def fetch_lmstudio_models() -> list:
330
+ """
331
+ Fetches available models from the LM Studio server.
332
+ :return: List of available models.
333
+ """
334
+ try:
335
+ response = requests.get("http://localhost:1234/v1/models")
336
+ response.raise_for_status()
337
+ data = response.json()
338
+ return [model["id"] for model in data["data"]]
339
+ except requests.exceptions.RequestException as e:
340
+ print(f"Error fetching LM Studio models: {e}")
341
+ return ["None"]
342
+
343
+
344
+ available_models = ["None"]
345
+
346
+ def update_available_models(provider:str):
347
+ """
348
+ Fetches available models based on the selected provider.
349
+ :param provider: The selected provider name.
350
+ :return: Update object for the model dropdown.
351
+ """
352
+ global available_models
353
+ print(f"Selected provider: {provider}")
354
+
355
+ match provider:
356
+ case "hugging face":
357
+ available_models = ["None", "", "QWEN-2-instruct"]
358
+ case "Ollama":
359
+ available_models = fetch_ollama_models()
360
+ case "LMStudio":
361
+ available_models = fetch_lmstudio_models()
362
+ case "Gemini":
363
+ available_models = ["None", "Gemini-2.0-flash-exp", "Gemini-2.0-flash-lite"]
364
+ case "Anthropic":
365
+ available_models = ["None", "Claude-3"]
366
+ case "OpenAI":
367
+ available_models = ["None", "GPT-4", "GPT-3.5-turbo"]
368
+ case "Basic Agent":
369
+ available_models = ["None"]
370
+ case _:
371
+ available_models = ["None"]
372
+
373
+ print(f"Available models for {provider}: {available_models}")
374
+
375
+
376
+ return gr.Dropdown(choices=available_models)
377
+
378
+
379
+
380
  # --- Build Gradio Interface using Blocks ---
381
  with gr.Blocks() as demo:
382
  gr.Markdown("# Basic Agent Evaluation Runner")
383
+
384
+ agent_info_text = gr.Text(label="Agent Name", value=get_agent_info(), interactive=False, visible=True)
385
+
386
  gr.Markdown(
387
  """
388
  **Instructions:**
389
 
390
+ Select a provider and then model to generate the agent.
 
 
 
 
 
 
 
391
  """
392
  )
393
 
394
+ provider_select = gr.Dropdown(
395
+ label="Select Provider",
396
+ choices=["Basic Agent", "LMStudio", "Ollama", "hugging face", "Gemini", "Anthropic", "OpenAI"],
397
+ interactive=True,
398
+ visible=True,
399
+ multiselect=False)
400
+
401
+ model_select = gr.Dropdown(
402
+ label="Select Model",
403
+ choices=available_models,
404
+ interactive=True,
405
+ visible=True,
406
+ multiselect=False)
407
+
408
+ # changing the provider will change the available models
409
+ provider_select.input(fn=update_available_models, inputs=provider_select, outputs=[model_select])
410
+
411
+ # changing a model will update the agent (see select_agent)
412
+ model_select.change(fn=select_agent, inputs=[provider_select, model_select])
413
+
414
+ # in case of running on HF space, we support the login button
415
+ # we somehow need to find out, if this is running on HF space or not
416
+ #gr.LoginButton()
417
 
418
  run_button = gr.Button("Run Evaluation & Submit All Answers")
419
 
420
+ run_test_button = gr.Button("Run Test on Random Question")
421
+
422
+ run_multiple_tests_button = gr.Button("Run tests on all questions")
423
+
424
+ run_get_questions_button = gr.Button("Get Questions")
425
+
426
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
427
  # Removed max_rows=10 from DataFrame constructor
428
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
429
 
430
+ run_test_button.click(
431
+ fn=run_test_on_random_question,
432
+ outputs=[status_output, results_table]
433
+ )
434
+
435
+ run_multiple_tests_button.click(
436
+ fn=run_test_on_all_questions,
437
+ outputs=[status_output, results_table]
438
+ )
439
+
440
  run_button.click(
441
  fn=run_and_submit_all,
442
  outputs=[status_output, results_table]
443
  )
444
 
445
+ run_get_questions_button.click(
446
+ fn=get_all_questions,
447
+ outputs=[status_output, results_table]
448
+ )
449
+
450
  if __name__ == "__main__":
451
  print("\n" + "-"*30 + " App Starting " + "-"*30)
452
  # Check for SPACE_HOST and SPACE_ID at startup for information