hasanain9 commited on
Commit
393a8af
·
verified ·
1 Parent(s): 3628aaf
Files changed (1) hide show
  1. app.py +57 -78
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
- import datetime
3
  import requests
 
4
  import pandas as pd
5
  import gradio as gr
6
  from openai import OpenAI
@@ -9,158 +9,137 @@ class ToolEnhancedAgent:
9
  def __init__(self):
10
  api_key = os.getenv("OPENAI_API_KEY")
11
  if not api_key:
12
- raise ValueError("OPENAI_API_KEY not found in environment variables.")
13
  self.client = OpenAI(api_key=api_key)
14
- print("ToolEnhancedAgent initialized.")
15
 
16
  def use_tool(self, tool_name: str, input_text: str) -> str:
17
  try:
18
  if tool_name == "calculator":
19
- # Sangat dasar dan raw eval, hati2 di produksi
20
  return str(eval(input_text))
21
  elif tool_name == "date":
22
  return str(datetime.datetime.now().date())
23
  elif tool_name == "wikipedia":
24
  return self.search_wikipedia(input_text)
25
  else:
26
- return "[Unknown tool]"
27
  except Exception as e:
28
- return f"[Tool error: {e}]"
29
 
30
- def search_wikipedia(self, query: str) -> str:
31
  try:
32
- url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}"
33
- res = requests.get(url)
34
  if res.status_code == 200:
35
  return res.json().get("extract", "No summary found.")
36
  else:
37
- return f"No Wikipedia summary for '{query}'."
38
  except Exception as e:
39
- return f"Wikipedia API error: {e}"
40
 
41
  def __call__(self, question: str) -> str:
42
- # Prompt dengan chain of thought agar GPT berpikir langkah demi langkah
43
  prompt = (
44
- "You are an assistant that solves problems step-by-step, and you can use tools like calculator, date, and wikipedia if needed.\n"
 
45
  f"Question: {question}\n"
46
- "Answer (think step-by-step and use tools if helpful):"
47
  )
48
 
49
  try:
50
  response = self.client.chat.completions.create(
51
- model="gpt-4o-mini", # model yang lebih ringan dan biasanya cukup
52
  messages=[
53
- {"role": "system", "content": "You are a helpful assistant that thinks step-by-step and can use tools."},
54
  {"role": "user", "content": prompt}
55
  ],
56
- temperature=0.2,
57
  max_tokens=700,
58
  )
59
  answer = response.choices[0].message.content.strip()
60
- print(f"Generated answer (preview): {answer[:100]}...")
61
  return answer
62
  except Exception as e:
63
- print(f"Agent error: {e}")
64
- return f"[Agent error: {e}]"
65
-
66
 
67
- def run_and_submit_all(profile: gr.OAuthProfile | None):
68
- if profile is None:
69
- return "Please login to Hugging Face to submit answers.", None
70
 
71
  username = profile.username
72
  api_url = "https://agents-course-unit4-scoring.hf.space"
73
  questions_url = f"{api_url}/questions"
74
  submit_url = f"{api_url}/submit"
75
 
 
76
  try:
77
  agent = ToolEnhancedAgent()
78
  except Exception as e:
79
  return f"Agent initialization error: {e}", None
80
 
 
81
  try:
82
  response = requests.get(questions_url, timeout=15)
83
  response.raise_for_status()
84
  questions = response.json()
85
- if not isinstance(questions, list) or len(questions) == 0:
86
- return "Failed to fetch questions or empty list.", None
87
  except Exception as e:
88
  return f"Error fetching questions: {e}", None
89
 
90
- answers = []
91
  results_log = []
92
 
93
- for item in questions:
94
- task_id = item.get("task_id")
95
- question_text = item.get("question")
96
- if not task_id or question_text is None:
97
  continue
98
  try:
99
- answer = agent(question_text)
100
- answers.append({"task_id": task_id, "submitted_answer": answer})
101
- results_log.append({
102
- "Task ID": task_id,
103
- "Question": question_text,
104
- "Submitted Answer": answer
105
- })
106
  except Exception as e:
107
- results_log.append({
108
- "Task ID": task_id,
109
- "Question": question_text,
110
- "Submitted Answer": f"[Agent error: {e}]"
111
- })
112
 
113
- if not answers:
114
- return "Agent did not produce answers.", pd.DataFrame(results_log)
115
 
116
- agent_code_url = f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main"
117
-
118
- submission_payload = {
119
  "username": username,
120
- "agent_code": agent_code_url,
121
- "answers": answers
122
  }
123
 
124
  try:
125
- submit_response = requests.post(submit_url, json=submission_payload, timeout=60)
126
- submit_response.raise_for_status()
127
- result = submit_response.json()
128
-
129
- final_status = (
130
- f"Submission Success!\n"
131
  f"User: {result.get('username')}\n"
132
  f"Score: {result.get('score', 'N/A')}%\n"
133
  f"Correct: {result.get('correct_count', '?')}/{result.get('total_attempted', '?')}\n"
134
- f"Message: {result.get('message', 'No message')}"
135
  )
136
- return final_status, pd.DataFrame(results_log)
137
-
138
  except Exception as e:
139
- return f"Submission error: {e}", pd.DataFrame(results_log)
140
-
141
 
142
  with gr.Blocks() as demo:
143
- gr.Markdown("# GAIA Benchmark - Tool Enhanced Agent")
144
-
145
- gr.Markdown(
146
- """
147
- 1. Login with your Hugging Face account.
148
- 2. Click 'Run Evaluation & Submit All Answers' to run the agent on GAIA tasks.
149
- 3. View your results and submission status.
150
- """
151
- )
152
 
153
- login_button = gr.LoginButton()
154
- run_button = gr.Button("Run Evaluation & Submit All Answers")
155
- status_box = gr.Textbox(label="Status / Submission Result", lines=7)
156
- results_table = gr.DataFrame(label="Questions and Agent Answers")
157
 
158
- run_button.click(
159
  fn=run_and_submit_all,
160
- inputs=login_button,
161
- outputs=[status_box, results_table]
162
  )
163
 
164
-
165
  if __name__ == "__main__":
166
  demo.launch()
 
1
  import os
 
2
  import requests
3
+ import datetime
4
  import pandas as pd
5
  import gradio as gr
6
  from openai import OpenAI
 
9
  def __init__(self):
10
  api_key = os.getenv("OPENAI_API_KEY")
11
  if not api_key:
12
+ raise ValueError("OPENAI_API_KEY is not set.")
13
  self.client = OpenAI(api_key=api_key)
14
+ print("ToolEnhancedAgent initialized with GPT + CoT + Tools.")
15
 
16
  def use_tool(self, tool_name: str, input_text: str) -> str:
17
  try:
18
  if tool_name == "calculator":
 
19
  return str(eval(input_text))
20
  elif tool_name == "date":
21
  return str(datetime.datetime.now().date())
22
  elif tool_name == "wikipedia":
23
  return self.search_wikipedia(input_text)
24
  else:
25
+ return "[Tool Error: Unknown Tool]"
26
  except Exception as e:
27
+ return f"[Tool Error: {e}]"
28
 
29
+ def search_wikipedia(self, query):
30
  try:
31
+ res = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}")
 
32
  if res.status_code == 200:
33
  return res.json().get("extract", "No summary found.")
34
  else:
35
+ return f"No Wikipedia summary for {query}."
36
  except Exception as e:
37
+ return f"Wiki error: {e}"
38
 
39
  def __call__(self, question: str) -> str:
 
40
  prompt = (
41
+ "You are a helpful AI assistant. Use tools when necessary. "
42
+ "Think step-by-step before answering. Respond clearly.\n\n"
43
  f"Question: {question}\n"
44
+ "Answer (show thinking steps):"
45
  )
46
 
47
  try:
48
  response = self.client.chat.completions.create(
49
+ model="gpt-4o-mini",
50
  messages=[
51
+ {"role": "system", "content": "You are a smart assistant that can use tools and think step-by-step."},
52
  {"role": "user", "content": prompt}
53
  ],
54
+ temperature=0.3,
55
  max_tokens=700,
56
  )
57
  answer = response.choices[0].message.content.strip()
58
+ print(f"Answer generated: {answer[:100]}...")
59
  return answer
60
  except Exception as e:
61
+ print(f"[Agent Error]: {e}")
62
+ return f"[Agent Error: {e}]"
 
63
 
64
+ def run_and_submit_all(profile):
65
+ if not profile:
66
+ return "Please login to Hugging Face first.", None
67
 
68
  username = profile.username
69
  api_url = "https://agents-course-unit4-scoring.hf.space"
70
  questions_url = f"{api_url}/questions"
71
  submit_url = f"{api_url}/submit"
72
 
73
+ # Instantiate agent
74
  try:
75
  agent = ToolEnhancedAgent()
76
  except Exception as e:
77
  return f"Agent initialization error: {e}", None
78
 
79
+ # Fetch questions
80
  try:
81
  response = requests.get(questions_url, timeout=15)
82
  response.raise_for_status()
83
  questions = response.json()
84
+ if not questions:
85
+ return "No questions fetched.", None
86
  except Exception as e:
87
  return f"Error fetching questions: {e}", None
88
 
89
+ answers_payload = []
90
  results_log = []
91
 
92
+ for q in questions:
93
+ task_id = q.get("task_id")
94
+ question = q.get("question")
95
+ if not task_id or question is None:
96
  continue
97
  try:
98
+ answer = agent(question)
99
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
100
+ results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": answer})
 
 
 
 
101
  except Exception as e:
102
+ results_log.append({"Task ID": task_id, "Question": question, "Submitted Answer": f"Error: {e}"})
 
 
 
 
103
 
104
+ if not answers_payload:
105
+ return "No answers generated.", pd.DataFrame(results_log)
106
 
107
+ submission_data = {
 
 
108
  "username": username,
109
+ "agent_code": f"https://huggingface.co/spaces/{os.getenv('SPACE_ID')}/tree/main",
110
+ "answers": answers_payload,
111
  }
112
 
113
  try:
114
+ resp = requests.post(submit_url, json=submission_data, timeout=60)
115
+ resp.raise_for_status()
116
+ result = resp.json()
117
+ status = (
118
+ f"Submission Successful!\n"
 
119
  f"User: {result.get('username')}\n"
120
  f"Score: {result.get('score', 'N/A')}%\n"
121
  f"Correct: {result.get('correct_count', '?')}/{result.get('total_attempted', '?')}\n"
122
+ f"Message: {result.get('message', '')}"
123
  )
124
+ results_df = pd.DataFrame(results_log)
125
+ return status, results_df
126
  except Exception as e:
127
+ results_df = pd.DataFrame(results_log)
128
+ return f"Submission failed: {e}", results_df
129
 
130
  with gr.Blocks() as demo:
131
+ gr.Markdown("# ToolEnhancedAgent for GAIA Benchmark")
132
+ login_btn = gr.LoginButton()
133
+ run_btn = gr.Button("Run Evaluation & Submit All Answers")
 
 
 
 
 
 
134
 
135
+ status_output = gr.Textbox(label="Status / Result", lines=6, interactive=False)
136
+ results_table = gr.DataFrame(headers=["Task ID", "Question", "Submitted Answer"], label="Agent Answers", wrap=True)
 
 
137
 
138
+ run_btn.click(
139
  fn=run_and_submit_all,
140
+ inputs=[login_btn],
141
+ outputs=[status_output, results_table]
142
  )
143
 
 
144
  if __name__ == "__main__":
145
  demo.launch()