Final_Assignment_Template

Sleeping

test3

2707bf9 verified 5 months ago

6.6 kB

	import os
	import gradio as gr
	import requests
	import pandas as pd
	from openai import OpenAI

	# Constants
	DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

	# ToolEnhancedAgent menggunakan OpenAI API terbaru (1.x)
	class ToolEnhancedAgent:
	def __init__(self):
	api_key = os.getenv("OPENAI_API_KEY")
	if not api_key:
	raise ValueError("OPENAI_API_KEY not found in environment variables.")
	self.client = OpenAI(api_key=api_key)
	print("ToolEnhancedAgent initialized with OpenAI GPT model.")

	def use_tool(self, tool_name: str, input_text: str) -> str:
	# Contoh penggunaan tool sederhana: kalkulator, tanggal, Wikipedia
	try:
	if tool_name == "calculator":
	# Aman eval dengan math
	import math
	return str(eval(input_text, {"__builtins__": None, "math": math}))
	elif tool_name == "date":
	import datetime
	return str(datetime.datetime.now().date())
	elif tool_name == "wikipedia":
	return self.search_wikipedia(input_text)
	else:
	return "[Tool Error: Unknown tool]"
	except Exception as e:
	return f"[Tool Error: {e}]"

	def search_wikipedia(self, query: str) -> str:
	try:
	res = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}")
	if res.status_code == 200:
	return res.json().get("extract", "No summary found.")
	return f"No Wikipedia summary for {query}."
	except Exception as e:
	return f"Wikipedia Error: {e}"

	def __call__(self, question: str) -> str:
	# Prompt dengan Chain of Thought dan instruksi penggunaan tools
	prompt = (
	"You are an AI assistant that can think step-by-step and use tools when needed.\n"
	f"Question: {question}\n"
	"Answer with your reasoning steps. If needed, mention the tool you want to use like [calculator], [date], [wikipedia]."
	)

	try:
	response = self.client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{"role": "system", "content": "You are a helpful assistant using tools and reasoning."},
	{"role": "user", "content": prompt}
	],
	temperature=0.3,
	max_tokens=700,
	)
	answer = response.choices[0].message.content.strip()
	# Simple tool simulation: jika ada tag [tool:toolname] di jawaban, gunakan tool dan tambahkan hasilnya
	# Contoh: "[calculator] 2+2" -> hitung 4 dan tambahkan ke jawaban
	import re
	pattern = r"\[([a-z]+)\](.*)"
	match = re.search(pattern, answer, re.IGNORECASE)
	if match:
	tool_name = match.group(1).lower()
	tool_input = match.group(2).strip()
	tool_result = self.use_tool(tool_name, tool_input)
	answer += f"\n\n[Tool used: {tool_name}]\nResult: {tool_result}"
	return answer
	except Exception as e:
	print(f"Agent error: {e}")
	return f"[Agent Error: {e}]"

	# Revisi run_and_submit_all untuk menerima profile (LoginButton output)
	def run_and_submit_all(profile: gr.OAuthProfile \| None):
	if profile is None:
	return "Please login with your Hugging Face account.", None

	username = profile.username
	space_id = os.getenv("SPACE_ID") or "your-username/your-space" # Ganti sesuai space kamu jika perlu

	api_url = DEFAULT_API_URL
	questions_url = f"{api_url}/questions"
	submit_url = f"{api_url}/submit"

	try:
	agent = ToolEnhancedAgent()
	except Exception as e:
	return f"Error initializing agent: {e}", None

	agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main"

	# Ambil pertanyaan
	try:
	response = requests.get(questions_url, timeout=15)
	response.raise_for_status()
	questions_data = response.json()
	except Exception as e:
	return f"Error fetching questions: {e}", None

	answers_payload = []
	results_log = []

	for item in questions_data:
	task_id = item.get("task_id")
	question_text = item.get("question")
	if not task_id or question_text is None:
	continue
	try:
	answer = agent(question_text)
	answers_payload.append({"task_id": task_id, "submitted_answer": answer})
	results_log.append({
	"Task ID": task_id,
	"Question": question_text,
	"Submitted Answer": answer,
	})
	except Exception as e:
	results_log.append({
	"Task ID": task_id,
	"Question": question_text,
	"Submitted Answer": f"Agent Error: {e}",
	})

	if not answers_payload:
	return "Agent did not produce answers to submit.", pd.DataFrame(results_log)

	submission_data = {
	"username": username.strip(),
	"agent_code": agent_code_url,
	"answers": answers_payload,
	}

	try:
	submit_response = requests.post(submit_url, json=submission_data, timeout=60)
	submit_response.raise_for_status()
	result = submit_response.json()

	status = (
	f"Submission Successful!\n"
	f"User: {result.get('username')}\n"
	f"Score: {result.get('score', 'N/A')}% "
	f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
	f"Message: {result.get('message', 'No message')}"
	)
	return status, pd.DataFrame(results_log)
	except Exception as e:
	return f"Submission failed: {e}", pd.DataFrame(results_log)

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# GAIA Benchmark Agent Runner")
	gr.Markdown("""
	1. Login with your Hugging Face account.
	2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, and submit answers.
	""")

	login_btn = gr.LoginButton()
	run_btn = gr.Button("Run Evaluation & Submit All Answers")

	status_out = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
	results_df = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

	run_btn.click(
	fn=run_and_submit_all,
	inputs=[login_btn],
	outputs=[status_out, results_df]
	)

	if __name__ == "__main__":
	demo.launch(debug=True, share=False)