File size: 9,252 Bytes
10e9b7d
d5eb6ce
10e9b7d
eccf8e4
7d65c66
3c4371f
8665f65
 
 
3c85cbc
208de35
8665f65
f1bf21c
10e9b7d
e80aab9
3db6293
e80aab9
31243f4
1a07a92
 
e7d36f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31243f4
8665f65
dfbde74
0f4fca1
8665f65
6b8c9d9
dfbde74
8665f65
eaad269
 
 
d5eb6ce
 
 
4021bf3
0f4fca1
1a07a92
 
 
 
0f4fca1
3c4371f
1a07a92
 
0f4fca1
3c4371f
7e4a06b
0f4fca1
7d65c66
3c4371f
1a07a92
7e4a06b
31243f4
 
e80aab9
1a07a92
31243f4
 
 
3c4371f
31243f4
0f4fca1
1a07a92
36ed51a
c1fd3d2
3c4371f
1a07a92
31243f4
eccf8e4
31243f4
7d65c66
1a07a92
31243f4
1a07a92
 
31243f4
0f4fca1
 
31243f4
0f4fca1
31243f4
 
e80aab9
1a07a92
 
 
 
 
 
3c4371f
1a07a92
 
31243f4
 
 
 
 
 
 
1a07a92
0f4fca1
7d65c66
 
31243f4
0f4fca1
 
31243f4
 
3c4371f
31243f4
 
1a07a92
7d65c66
31243f4
1a07a92
 
e80aab9
7d65c66
e80aab9
 
31243f4
e80aab9
 
3c4371f
 
 
e80aab9
 
31243f4
 
7d65c66
0f4fca1
31243f4
 
 
e80aab9
1a07a92
e80aab9
1a07a92
 
31243f4
0ee0419
e514fd7
 
0f4fca1
 
e514fd7
e80aab9
1a07a92
7e4a06b
e80aab9
1a07a92
31243f4
1a07a92
 
 
31243f4
0f4fca1
31243f4
e80aab9
 
 
3c4371f
0f4fca1
 
7d65c66
3c4371f
 
7d65c66
3c4371f
7d65c66
 
0f4fca1
7d65c66
 
 
 
 
 
5bfd15c
 
 
 
 
 
 
 
627d094
5bfd15c
 
 
 
 
 
 
 
f1bf21c
 
0ae8b9f
 
 
 
 
 
 
f1bf21c
0ae8b9f
f1bf21c
5bfd15c
3c4371f
31243f4
0f4fca1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import os
import re
import gradio as gr
import requests
import inspect
import pandas as pd
# from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
# from llama_index.core.agent.workflow import AgentWorkflow
# from llama_index.core.tools import FunctionTool
from agent_llama import all_tools
from agent_graph import build_graph
from langchain_core.messages import HumanMessage
from langchain_google_genai import ChatGoogleGenerativeAI

# --- Constants ---
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"

# --- Basic Agent Definition ---

# A custom agent class that wraps an LLM and agent workflow from llama index
# class BasicAgent:
#     def __init__(self):
#         print("BasicAgent initialized.")
#         self.llm = HuggingFaceInferenceAPI(model_name="HuggingFaceH4/zephyr-7b-beta")

#         self.agent = AgentWorkflow.from_tools_or_functions(
#             all_tools,  # make sure all_tools are sync functions
#             llm=self.llm,
#             system_prompt="You are a general AI assistant. Think step-by-step, and return only the final answer on the last line."
#         )

#     def __call__(self, question: str) -> str:
#         try:
#             response = self.agent.run(question)  # sync version of arun()
#             return str(response)
#         except Exception as e:
#             return f"Agent error: {e}"

# Using LangGraph
class BasicAgent:
    """A langgraph agent."""
    def __init__(self):
        print("BasicAgent initialized.")
        self.graph = build_graph()

    def __call__(self, question: str) -> str:
        print(f"Agent received question (first 50 chars): {question[:50]}...")
        user_message = [HumanMessage(content=question)]
        result = self.graph.invoke({"messages": user_message})
        answer = result['messages'][-1].content
         # Use regex to extract only the final answer
        match = re.search(r"FINAL ANSWER:\s*(.*)", answer)
        return match.group(1).strip() if match else answer

def run_and_submit_all(profile: gr.OAuthProfile | None):
    """ 
    Runs agent across GAIA questions, submits the answers and returns the results
    """
    # Retrive HF space ID from enviornment variables
    space_id = os.getenv("SPACE_ID")

    # Check if user is logged in
    if profile: # Populated after log in gr.LoginButton
        username = f"{profile.username}"
        print(f"User logged in: {username}")
    else:
        print("User not logged in.")
        return "Please Login to Hugging Face with the button.", None

    # Initialize GAIA question and submission urls
    api_url = DEFAULT_API_URL
    questions_url = f"{api_url}/questions"
    submit_url = f"{api_url}/submit"

    # Initialize agent 
    try:
        agent = BasicAgent()
    except Exception as e:
        print(f"Error instantiating agent: {e}")
        return f"Error initializing agent: {e}", None

    # Initialize agent repository to be used for agent code
    agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
    print(agent_code)

    # Fetching questions
    print(f"Fetching questions from: {questions_url}")
    try:
        response = requests.get(questions_url, timeout=15)
        response.raise_for_status()
        # JSON containing GAIA questions
        questions_data = response.json()

        # Guard clause - Check for empty list or errors
        if not questions_data:
            print("Fetched questions list is empty.")
            return "Fetched questions list is empty or invalid format.", None
        print(f"Fetched {len(questions_data)} questions.")
    except Exception as e:
        print(f"Error fetching questions: {e}")
        return f"Error fetching questions: {e}", None

    #  Initialize empty logs for results and answers to be submitted
    
    answers_payload = [] # Task ID + Submitted Answer - Used for evaluation
    results_log = [] # Task ID + Question + Submitted Answer - Used for display

    # Run agent on questions
    print(f"Running agent on {len(questions_data)} questions...")

    # For loop to pull individual questions as agent input
    for item in questions_data:
        task_id = item.get("task_id")
        question_text = item.get("question")
        if not task_id or question_text is None:
            print(f"Skipping item with missing task_id or question: {item}")
            continue
        try:
            # Submit question to agent
            submitted_answer = agent(question_text)
            answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
        except Exception as e:
            print(f"Error running agent on task {task_id}: {e}")
            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})

    if not answers_payload:
        print("Agent did not produce any answers to submit.")
        return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)

    # Initialize submission data
    submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
    
    # POST submission data
    try:
        response = requests.post(submit_url, json=submission_data, timeout=60)
        response.raise_for_status()
        result_data = response.json()
        final_status = (
            f"Submission Successful!\n"
            f"User: {result_data.get('username')}\n"
            f"Overall Score: {result_data.get('score', 'N/A')}% "
            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
            f"Message: {result_data.get('message', 'No message received.')}"
        )
        print("Submission successful.")
        results_df = pd.DataFrame(results_log)
        return final_status, results_df
    except Exception as e:
        status_message = f"Submission Failed: {e}"
        print(status_message)
        results_df = pd.DataFrame(results_log)
        return status_message, results_df

# Initialize Gradio app
with gr.Blocks() as demo:

    # Markdown text blocks
    gr.Markdown("# Basic Agent Evaluation Runner")
    gr.Markdown(
        """
        **Instructions:**
        1. Log in to your Hugging Face account using the button below.
        2. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, and see the score.
        """
    )
    # Adds a login button for authentication
    gr.LoginButton()

    # A button that triggers evaluation logic when clicked
    run_button = gr.Button("Run Evaluation & Submit All Answers")
    status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False) # Non interactive textbox to show result status
    results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True) #Gives 

    run_button.click(
        fn=run_and_submit_all,
        outputs=[status_output, results_table]
    )

if __name__ == "__main__":
    print("\n" + "-"*30 + " App Starting " + "-"*30)
    space_host_startup = os.getenv("SPACE_HOST")
    space_id_startup = os.getenv("SPACE_ID")

    if space_host_startup:
        print(f"✅ SPACE_HOST found: {space_host_startup}")
        print(f"   Runtime URL should be: https://{space_host_startup}.hf.space")
    else:
        print("ℹ️  SPACE_HOST environment variable not found (running locally?).")

    if space_id_startup:
        print(f"✅ SPACE_ID found: {space_id_startup}")
        print(f"   Repo URL: https://huggingface.co/spaces/{space_id_startup}")
        print(f"   Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
    else:
        print("ℹ️  SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")

    print("🔧 Running startup checks...\n")

    # Check WikipediaLoader
    try:
        from langchain_community.document_loaders import WikipediaLoader
        print("✅ WikipediaLoader imported successfully.")
        
        # Try fetching a test page
        test_docs = WikipediaLoader(query="Alan Turing", load_max_docs=1).load()
        if test_docs and test_docs[0].page_content.strip():
            print("✅ WikipediaLoader can fetch content.\n")
        else:
            print("⚠️ WikipediaLoader returned no content.\n")
    except Exception as e:
        print("❌ WikipediaLoader failed:", e, "\n")

    # Check Google Gemini LLM
    try:
        from langchain_google_genai import ChatGoogleGenerativeAI
        import os

        if os.getenv("GOOGLE_API_KEY"):
            llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
            print("✅ Google Gemini model instantiated successfully.\n")
        else:
            print("⚠️ GOOGLE_API_KEY not found in environment.\n")
    except Exception as e:
        print("❌ langchain-google-genai or Gemini setup failed:", e, "\n")


    print("-"*(60 + len(" App Starting ")) + "\n")
    print("Launching Gradio Interface for Basic Agent Evaluation...")
    demo.launch(debug=True, share=False)