Tesvia commited on
Commit
47c6176
·
verified ·
1 Parent(s): 81917a3

Upload 3 files

Browse files
Files changed (3) hide show
  1. agent.py +77 -0
  2. main.py +66 -0
  3. tools.py +43 -0
agent.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # agent.py
2
+ from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
3
+ from tools import tool_search, tool_calculate, tool_load_file
4
+
5
+ class CustomAgent:
6
+ def __init__(self, model_name="google/flan-t5-xl", use_gpu=False):
7
+ """Initialize the agent with an LLM (planner) and set up tools and prompt templates."""
8
+ # Load the language model pipeline for text generation (the 'planner' LLM)
9
+ device = 0 if use_gpu else -1
10
+ self.llm = pipeline("text2text-generation", model=model_name, tokenizer=model_name, device=device)
11
+ # Define the system prompt describing the agent and its tools
12
+ self.tool_descriptions = (
13
+ "Available tools:\n"
14
+ "1. search(query) - searches for information about 'query' and returns a summary.\n"
15
+ "2. calculate(expression) - evaluates a mathematical expression and returns the result.\n"
16
+ "3. load_file(task_id) - loads an attached file for the task if any (returns a description or content snippet).\n"
17
+ )
18
+ self.system_message = (
19
+ "You are an AI agent that can use tools to answer questions. "
20
+ "You have the following tools:\n"
21
+ f"{self.tool_descriptions}\n"
22
+ "Follow this format:\n"
23
+ "Thought: (think about the problem step by step)\n"
24
+ "Action: (choose one of the tools and specify input)\n"
25
+ "Observation: (result of the tool will be given)\n"
26
+ "… [this Thought->Action->Observation cycle can repeat] …\n"
27
+ "Thought: (when you have enough information, think final step)\n"
28
+ "Final Answer: (provide the answer to the user's question)\n"
29
+ "Make sure to output the final answer directly with no extra text.\n"
30
+ )
31
+
32
+ def answer(self, question: str) -> str:
33
+ """Generate an answer for the given question by reasoning and using tools as needed."""
34
+ # Initialize the dialogue history with system instructions and user question
35
+ dialog = f"{self.system_message}\nUser Question: {question}\n"
36
+ # We will accumulate the agent's reasoning in this string as we loop
37
+ agent_thoughts = ""
38
+ for step in range(1, 10): # limit to 10 steps to avoid infinite loops
39
+ # Prompt the LLM with the conversation so far (system + history + current accumulated reasoning)
40
+ prompt = f"{dialog}{agent_thoughts}\nThought:"
41
+ response = self.llm(prompt, max_new_tokens=200, do_sample=False, return_text=True)[0]['generated_text']
42
+ # The LLM is expected to continue from "Thought:" and produce something like:
43
+ # "Thought: ...\nAction: tool_name(...)\n" or "Thought: ...\nFinal Answer: ...\n"
44
+ agent_output = response.strip()
45
+ # Append the LLM output to agent_thoughts
46
+ agent_thoughts += agent_output + "\n"
47
+ # Parse the LLM output to see if an action was proposed or a final answer given
48
+ if "Action:" in agent_output:
49
+ # Extract the tool name and argument from the action line
50
+ try:
51
+ action_line = agent_output.split("Action:")[1].strip()
52
+ # e.g. action_line = "search(World War 2)" or "calculate(12*7)"
53
+ tool_name, arg = action_line.split("(")
54
+ tool_name = tool_name.strip()
55
+ arg = arg.rstrip(")") # remove closing parenthesis
56
+ except Exception as e:
57
+ return "(Parsing Error: Invalid action format)"
58
+ # Execute the appropriate tool
59
+ if tool_name.lower() == "search":
60
+ result = tool_search(arg.strip().strip('"\''))
61
+ elif tool_name.lower() == "calculate":
62
+ result = tool_calculate(arg)
63
+ elif tool_name.lower() == "load_file":
64
+ result = tool_load_file(arg.strip().strip('"\''))
65
+ else:
66
+ result = f"(Unknown tool: {tool_name})"
67
+ # Add the observation to the conversation for the next loop iteration
68
+ agent_thoughts += f"Observation: {result}\n"
69
+ elif "Final Answer:" in agent_output:
70
+ # The agent is presenting a final answer – extract and return it
71
+ answer_text = agent_output.split("Final Answer:")[1].strip()
72
+ return answer_text # return without any "FINAL ANSWER" prefix
73
+ else:
74
+ # If neither Action nor Final Answer is found (LLM didn't follow format), break
75
+ break
76
+ # If loop ends without Final Answer, return whatever the agent last said or a fallback
77
+ return "(No conclusive answer)"
main.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+ import requests
3
+ from agent import CustomAgent
4
+ from config import HF_USERNAME, QUESTIONS_ENDPOINT, SUBMIT_ENDPOINT, DEFAULT_MODEL
5
+
6
+ def get_questions():
7
+ """Retrieve the list of evaluation questions from the GAIA Unit4 API."""
8
+ resp = requests.get(QUESTIONS_ENDPOINT, timeout=15)
9
+ resp.raise_for_status()
10
+ questions = resp.json()
11
+ if not isinstance(questions, list):
12
+ raise ValueError("Unexpected response format for questions.")
13
+ return questions
14
+
15
+ def submit_answers(username, answers_payload):
16
+ """Submit the answers to the GAIA API and return the result data."""
17
+ submission = {
18
+ "username": username.strip(),
19
+ "agent_code": f"https://huggingface.co/spaces/{username}/Final_Assignment_Template/tree/main",
20
+ "answers": answers_payload
21
+ }
22
+ resp = requests.post(SUBMIT_ENDPOINT, json=submission, timeout=60)
23
+ resp.raise_for_status()
24
+ return resp.json()
25
+
26
+ if __name__ == "__main__":
27
+ # Initialize our custom agent (you can change model or settings if needed)
28
+ agent = CustomAgent(model_name=DEFAULT_MODEL, use_gpu=False)
29
+ print("Agent initialized with model:", DEFAULT_MODEL)
30
+ # Fetch evaluation questions
31
+ try:
32
+ questions = get_questions()
33
+ except Exception as e:
34
+ print("Error fetching questions:", e)
35
+ exit(1)
36
+ print(f"Retrieved {len(questions)} questions for evaluation.")
37
+ # Run the agent on each question
38
+ answers_payload = []
39
+ for item in questions:
40
+ task_id = item.get("task_id")
41
+ question_text = item.get("question")
42
+ if not task_id or not question_text:
43
+ continue # skip if malformed
44
+ print(f"\n=== Question {task_id} ===")
45
+ print("Q:", question_text)
46
+ try:
47
+ ans = agent.answer(question_text)
48
+ except Exception as err:
49
+ ans = "(Agent failed to produce an answer)"
50
+ print("Error during agent reasoning:", err)
51
+ print("A:", ans)
52
+ answers_payload.append({"task_id": task_id, "submitted_answer": ans})
53
+ # All answers ready, submit them for scoring
54
+ try:
55
+ result = submit_answers(HF_USERNAME, answers_payload)
56
+ except Exception as e:
57
+ print("Submission failed:", e)
58
+ exit(1)
59
+ # Print the results
60
+ score = result.get('score', 'N/A')
61
+ correct = result.get('correct_count', '?')
62
+ total = result.get('total_attempted', '?')
63
+ message = result.get('message', '')
64
+ print(f"\nSubmission complete! Score: {score}% ({correct}/{total} correct)")
65
+ if message:
66
+ print("Message from server:", message)
tools.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tools.py
2
+ import math
3
+ import requests
4
+ import wikipedia # using Wikipedia API for a search tool
5
+
6
+ # Tool 1: Wikipedia Search
7
+ def tool_search(query: str) -> str:
8
+ """Search the web (Wikipedia API) for the query and return a summary of results."""
9
+ try:
10
+ # Use wikipedia library to get a summary
11
+ summary = wikipedia.summary(query, sentences=2)
12
+ return summary
13
+ except Exception as e:
14
+ return f"(Search tool failed: {e})"
15
+
16
+ # Tool 2: Calculator
17
+ def tool_calculate(expression: str) -> str:
18
+ """Evaluate a mathematical expression and return the result as a string."""
19
+ try:
20
+ result = eval(expression, {"__builtins__": None}, {"sqrt": math.sqrt, "pow": math.pow})
21
+ return str(result)
22
+ except Exception as e:
23
+ return f"(Calculation error: {e})"
24
+
25
+ # Tool 3: File loader (for image or text files from GAIA, if needed)
26
+ def tool_load_file(task_id: str) -> str:
27
+ """Fetch the file for a given task (if any) and return its content or a description."""
28
+ url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
29
+ try:
30
+ resp = requests.get(url, timeout=10)
31
+ resp.raise_for_status()
32
+ except Exception as e:
33
+ return f"(File download error: {e})"
34
+ # Determine content type
35
+ content_type = resp.headers.get("Content-Type", "")
36
+ if "image" in content_type:
37
+ # An image was received (could run image captioning model here)
38
+ return "[Image received from task]"
39
+ elif "text" in content_type or "json" in content_type:
40
+ text_data = resp.text[:500] # take first 500 chars to avoid huge text
41
+ return f"[File content snippet: {text_data}]"
42
+ else:
43
+ return "(Unknown file type or binary data received)"