Final_Assignment_Template

Sleeping

File size: 6,598 Bytes

10e9b7d
2707bf9
fffffb0
2794b4c
 
904e0bd
2707bf9
 
 
 
fffffb0
1803c5e
 
 
2707bf9
1803c5e
2707bf9
fffffb0
 
2707bf9
fffffb0
 
2707bf9
 
 
fffffb0
2707bf9
fffffb0
 
2794b4c
fffffb0
2707bf9
fffffb0
393a8af
fffffb0
2707bf9
fffffb0
393a8af
fffffb0
 
2707bf9
fffffb0
2707bf9
1803c5e
 
2707bf9
fffffb0
2707bf9
fffffb0
2707bf9
fffffb0
3628aaf
1803c5e
 
393a8af
1803c5e
2707bf9
fffffb0
1803c5e
393a8af
fffffb0
1803c5e
 
2707bf9
 
 
 
 
 
 
 
 
 
1803c5e
 
2707bf9
393a8af
177be6f
2707bf9
 
 
 
177be6f
d8dafef
2707bf9
 
 
d8dafef
 
177be6f
 
 
 
2707bf9
 
 
177be6f
2707bf9
177be6f
 
 
2707bf9
177be6f
d8dafef
177be6f
393a8af
177be6f
 
2707bf9
 
 
 
177be6f
 
2707bf9
393a8af
2707bf9
 
 
 
 
177be6f
2707bf9
 
 
 
 
d8dafef
393a8af
2707bf9
177be6f
393a8af
2707bf9
 
393a8af
177be6f
 
 
2707bf9
 
 
 
393a8af
 
3628aaf
2707bf9
 
 
177be6f
2707bf9
d8dafef
2707bf9
177be6f
2707bf9
177be6f
2707bf9
 
 
 
 
 
393a8af
 
177be6f
2707bf9
 
3628aaf
393a8af
3628aaf
393a8af
2707bf9
3628aaf
177be6f
 
2707bf9

import os
import gradio as gr
import requests
import pandas as pd
from openai import OpenAI

# Constants
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# ToolEnhancedAgent menggunakan OpenAI API terbaru (1.x)
class ToolEnhancedAgent:
    def __init__(self):
        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key:
            raise ValueError("OPENAI_API_KEY not found in environment variables.")
        self.client = OpenAI(api_key=api_key)
        print("ToolEnhancedAgent initialized with OpenAI GPT model.")

    def use_tool(self, tool_name: str, input_text: str) -> str:
        # Contoh penggunaan tool sederhana: kalkulator, tanggal, Wikipedia
        try:
            if tool_name == "calculator":
                # Aman eval dengan math
                import math
                return str(eval(input_text, {"__builtins__": None, "math": math}))
            elif tool_name == "date":
                import datetime
                return str(datetime.datetime.now().date())
            elif tool_name == "wikipedia":
                return self.search_wikipedia(input_text)
            else:
                return "[Tool Error: Unknown tool]"
        except Exception as e:
            return f"[Tool Error: {e}]"

    def search_wikipedia(self, query: str) -> str:
        try:
            res = requests.get(f"https://en.wikipedia.org/api/rest_v1/page/summary/{query}")
            if res.status_code == 200:
                return res.json().get("extract", "No summary found.")
            return f"No Wikipedia summary for {query}."
        except Exception as e:
            return f"Wikipedia Error: {e}"

    def __call__(self, question: str) -> str:
        # Prompt dengan Chain of Thought dan instruksi penggunaan tools
        prompt = (
            "You are an AI assistant that can think step-by-step and use tools when needed.\n"
            f"Question: {question}\n"
            "Answer with your reasoning steps. If needed, mention the tool you want to use like [calculator], [date], [wikipedia]."
        )

        try:
            response = self.client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant using tools and reasoning."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.3,
                max_tokens=700,
            )
            answer = response.choices[0].message.content.strip()
            # Simple tool simulation: jika ada tag [tool:toolname] di jawaban, gunakan tool dan tambahkan hasilnya
            # Contoh: "[calculator] 2+2" -> hitung 4 dan tambahkan ke jawaban
            import re
            pattern = r"\[([a-z]+)\](.*)"
            match = re.search(pattern, answer, re.IGNORECASE)
            if match:
                tool_name = match.group(1).lower()
                tool_input = match.group(2).strip()
                tool_result = self.use_tool(tool_name, tool_input)
                answer += f"\n\n[Tool used: {tool_name}]\nResult: {tool_result}"
            return answer
        except Exception as e:
            print(f"Agent error: {e}")
            return f"[Agent Error: {e}]"

# Revisi run_and_submit_all untuk menerima profile (LoginButton output)
def run_and_submit_all(profile: gr.OAuthProfile | None):
    if profile is None:
        return "Please login with your Hugging Face account.", None

    username = profile.username
    space_id = os.getenv("SPACE_ID") or "your-username/your-space"  # Ganti sesuai space kamu jika perlu

    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    try:
        agent = ToolEnhancedAgent()
    except Exception as e:
        return f"Error initializing agent: {e}", None

    agent_code_url = f"https://huggingface.co/spaces/{space_id}/tree/main"

    # Ambil pertanyaan
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        questions_data = response.json()
    except Exception as e:
        return f"Error fetching questions: {e}", None

    answers_payload = []
    results_log = []

    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            continue
        try:
            answer = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": answer})
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": answer,
            })
        except Exception as e:
            results_log.append({
                "Task ID": task_id,
                "Question": question_text,
                "Submitted Answer": f"Agent Error: {e}",
            })

    if not answers_payload:
        return "Agent did not produce answers to submit.", pd.DataFrame(results_log)

    submission_data = {
        "username": username.strip(),
        "agent_code": agent_code_url,
        "answers": answers_payload,
    }

    try:
        submit_response = requests.post(submit_url, json=submission_data, timeout=60)
        submit_response.raise_for_status()
        result = submit_response.json()

        status = (
            f"Submission Successful!\n"
            f"User: {result.get('username')}\n"
            f"Score: {result.get('score', 'N/A')}% "
            f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n"
            f"Message: {result.get('message', 'No message')}"
        )
        return status, pd.DataFrame(results_log)
    except Exception as e:
        return f"Submission failed: {e}", pd.DataFrame(results_log)

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# GAIA Benchmark Agent Runner")
    gr.Markdown("""
    1. Login with your Hugging Face account.
    2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, and submit answers.
    """)

    login_btn = gr.LoginButton()
    run_btn = gr.Button("Run Evaluation & Submit All Answers")

    status_out = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
    results_df = gr.DataFrame(label="Questions and Agent Answers", wrap=True)

    run_btn.click(
        fn=run_and_submit_all,
        inputs=[login_btn],
        outputs=[status_out, results_df]
    )

if __name__ == "__main__":
    demo.launch(debug=True, share=False)