hasanain9 commited on
Commit
177be6f
Β·
verified Β·
1 Parent(s): 2794b4c
Files changed (1) hide show
  1. app.py +90 -5
app.py CHANGED
@@ -1,17 +1,20 @@
1
  import os
2
  import datetime
3
  import requests
4
- import gradio as gr
5
  import pandas as pd
 
6
  from openai import OpenAI
7
 
 
 
 
8
  class ToolEnhancedAgent:
9
  def __init__(self):
10
  api_key = os.getenv("OPENAI_API_KEY")
11
  if not api_key:
12
  raise ValueError("OPENAI_API_KEY is not set.")
13
  self.client = OpenAI(api_key=api_key)
14
- print("βœ… ToolEnhancedAgent initialized with GPT-4 + CoT + Tool usage.")
15
 
16
  def use_tool(self, tool_name: str, input_text: str) -> str:
17
  try:
@@ -38,8 +41,8 @@ class ToolEnhancedAgent:
38
 
39
  def __call__(self, question: str) -> str:
40
  prompt = (
41
- "You are a helpful AI assistant. Use tools when needed. "
42
- "Think step-by-step before answering. Respond clearly.\n\n"
43
  f"Question: {question}\n"
44
  "Answer (show thinking steps):"
45
  )
@@ -48,7 +51,7 @@ class ToolEnhancedAgent:
48
  response = self.client.chat.completions.create(
49
  model="gpt-4",
50
  messages=[
51
- {"role": "system", "content": "You are a smart assistant that can use tools and think step-by-step."},
52
  {"role": "user", "content": prompt}
53
  ],
54
  temperature=0.3,
@@ -60,3 +63,85 @@ class ToolEnhancedAgent:
60
  except Exception as e:
61
  print(f"[Agent Error]: {e}")
62
  return f"[Agent Error: {e}]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import datetime
3
  import requests
 
4
  import pandas as pd
5
+ import gradio as gr
6
  from openai import OpenAI
7
 
8
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
+
10
+ # -------- Tool-Enhanced Agent --------
11
  class ToolEnhancedAgent:
12
  def __init__(self):
13
  api_key = os.getenv("OPENAI_API_KEY")
14
  if not api_key:
15
  raise ValueError("OPENAI_API_KEY is not set.")
16
  self.client = OpenAI(api_key=api_key)
17
+ print("βœ… ToolEnhancedAgent initialized with GPT-4 + CoT + Tools.")
18
 
19
  def use_tool(self, tool_name: str, input_text: str) -> str:
20
  try:
 
41
 
42
  def __call__(self, question: str) -> str:
43
  prompt = (
44
+ "You are a helpful AI assistant. You can use tools (calculator, date, wikipedia). "
45
+ "Think step-by-step before answering.\n\n"
46
  f"Question: {question}\n"
47
  "Answer (show thinking steps):"
48
  )
 
51
  response = self.client.chat.completions.create(
52
  model="gpt-4",
53
  messages=[
54
+ {"role": "system", "content": "You are a smart assistant that uses tools and thinks step-by-step."},
55
  {"role": "user", "content": prompt}
56
  ],
57
  temperature=0.3,
 
63
  except Exception as e:
64
  print(f"[Agent Error]: {e}")
65
  return f"[Agent Error: {e}]"
66
+
67
+ # -------- Evaluation & Submission Function --------
68
+ def run_and_submit_all(profile: gr.OAuthProfile | None):
69
+ space_id = os.getenv("SPACE_ID")
70
+ if not profile:
71
+ return "Please login with your Hugging Face account.", None
72
+ username = profile.username
73
+
74
+ agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
75
+ questions_url = f"{DEFAULT_API_URL}/questions"
76
+ submit_url = f"{DEFAULT_API_URL}/submit"
77
+
78
+ try:
79
+ agent = ToolEnhancedAgent()
80
+ except Exception as e:
81
+ return f"Agent init error: {e}", None
82
+
83
+ try:
84
+ response = requests.get(questions_url, timeout=15)
85
+ response.raise_for_status()
86
+ questions = response.json()
87
+ except Exception as e:
88
+ return f"Failed to fetch questions: {e}", None
89
+
90
+ results_log = []
91
+ answers_payload = []
92
+
93
+ for item in questions:
94
+ task_id = item.get("task_id")
95
+ question_text = item.get("question")
96
+ if not task_id or not question_text:
97
+ continue
98
+ try:
99
+ answer = agent(question_text)
100
+ except Exception as e:
101
+ answer = f"[Agent Error: {e}]"
102
+ results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": answer})
103
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
104
+
105
+ submission = {
106
+ "username": username,
107
+ "agent_code": agent_code,
108
+ "answers": answers_payload,
109
+ }
110
+
111
+ try:
112
+ response = requests.post(submit_url, json=submission, timeout=60)
113
+ response.raise_for_status()
114
+ result = response.json()
115
+ status = (
116
+ f"βœ… Submission Successful!\n"
117
+ f"User: {result.get('username')}\n"
118
+ f"Score: {result.get('score')}%\n"
119
+ f"Correct: {result.get('correct_count')}/{result.get('total_attempted')}\n"
120
+ f"Message: {result.get('message')}"
121
+ )
122
+ except Exception as e:
123
+ status = f"❌ Submission failed: {e}"
124
+
125
+ return status, pd.DataFrame(results_log)
126
+
127
+ # -------- Gradio Interface --------
128
+ with gr.Blocks() as demo:
129
+ gr.Markdown("## πŸ€– GAIA Agent Evaluation with ToolEnhancedAgent")
130
+ gr.Markdown(
131
+ """
132
+ - This Space lets you run your agent on GAIA benchmark.
133
+ - Login with your HF account, click "Run Evaluation", and wait for the results.
134
+ """
135
+ )
136
+
137
+ gr.LoginButton()
138
+ run_button = gr.Button("πŸš€ Run Evaluation & Submit")
139
+ status_output = gr.Textbox(label="Status / Score", lines=6, interactive=False)
140
+ table_output = gr.DataFrame(label="Agent Answers")
141
+
142
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, table_output])
143
+
144
+ # -------- Launch App --------
145
+ if __name__ == "__main__":
146
+ print("βœ… Launching GAIA Agent Evaluation App")
147
+ demo.launch(debug=True)