zzx commited on
Commit
90193e1
·
1 Parent(s): 2d9ff1b

commit all files

Browse files
Files changed (3) hide show
  1. z_http_request.py +31 -0
  2. z_search.py +63 -0
  3. zex.py +108 -0
z_http_request.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import html2text
2
+ import requests
3
+
4
+ def get_http_request(url: str, params: dict = None) -> str:
5
+ """ use this http request tool visiting website with http url to get more information.
6
+ for a given url(parameter), send a http request
7
+ to get more information in the url
8
+ """
9
+ max_return_char = 28000
10
+ try:
11
+ headers = {
12
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36"
13
+ }
14
+ response = requests.get(url, params=params,headers=headers)
15
+ response.raise_for_status()
16
+ converter = html2text.HTML2Text()
17
+ converter.ignore_images = True
18
+ converter.ignore_links = True
19
+ # converter.ignore_tables = True
20
+ markdown_text = converter.handle(response.text)
21
+ if len(markdown_text) > max_return_char:
22
+ print(f"\n http result size is : {len(markdown_text)}. try to truncate it\n")
23
+ markdown_text = markdown_text[:max_return_char] + "...[truncated]"
24
+ return markdown_text
25
+ except requests.exceptions.RequestException as e:
26
+ return f"http get_request error: {e}"
27
+
28
+
29
+ if __name__ == "__main__" :
30
+ html = get_http_request(url = "https://www.orczhou.com")
31
+ print(html)
z_search.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from googleapiclient.discovery import build
3
+ from ddgs import DDGS
4
+
5
+ GOOGLE_CSE_ID = os.getenv("GOOGLE_CSE_ID")
6
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
7
+
8
+ def tool_google_search(query: str):
9
+ """
10
+ This tool is a web search utility that leverages the DuckDuckGo search engine.
11
+ Its primary purpose is to allow a large language model (LLM) to retrieve real-time, up-to-date information from the internet.
12
+
13
+ Args:
14
+ query (str): search string
15
+
16
+ Returns:
17
+ string: top 3 related searching results
18
+ """
19
+ num_results = 3
20
+ try:
21
+ service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
22
+
23
+ res = service.cse().list(
24
+ q=query,
25
+ cx=GOOGLE_CSE_ID,
26
+ num=num_results
27
+ ).execute()
28
+
29
+ r = ""
30
+ if 'items' in res:
31
+ search_results = res['items']
32
+ for i,item in enumerate(search_results,1):
33
+ r += f"\n search result title {i}:" + item.get('title') + "\n"
34
+ r += f"\n search result summary {i}: " + item.get('snippet') + "\n"
35
+ r += f"\n get more information with this url: " + item.get('link') + " \n\n"
36
+ return r
37
+ else:
38
+ r = "no search result."
39
+ return r
40
+
41
+ except Exception as e:
42
+ print(f"something is wrong: {e}")
43
+ return ""
44
+
45
+ def search_information_from_web(query: str) -> str:
46
+ """ This tool is a web search utility that leverages the DuckDuckGo search engine. Its primary purpose is to allow a large language model (LLM) to retrieve real-time, up-to-date information from the internet.
47
+ input variables:
48
+ query string
49
+ """
50
+ results = DDGS().text(query, max_results=3)
51
+ r = ""
52
+ if results:
53
+ for i,result in enumerate(results, 1):
54
+ r += f"\n search result summary {i}: {result['body']}"
55
+ r += f"\n get more information with this url: {result['href']} \n\n"
56
+ else:
57
+ r = "no search result."
58
+ return r
59
+
60
+ if __name__ == "__main__":
61
+ search_query = "who is orczhou"
62
+ results = tool_google_search(search_query)
63
+ print(results)
zex.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # version 0.3
2
+
3
+ from typing_extensions import TypedDict
4
+ from typing import List, TypedDict, Annotated, Optional
5
+ from langgraph.prebuilt import ToolNode, tools_condition
6
+ from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
7
+ from langchain_openai import ChatOpenAI
8
+ from langgraph.graph.message import add_messages
9
+ from langchain_core.messages import SystemMessage, HumanMessage
10
+ from langgraph.graph import START, StateGraph
11
+ import os
12
+
13
+ ############# state #############
14
+ class AgentState(TypedDict):
15
+ task_id : str
16
+ question: str
17
+ file_name: str
18
+ file_url: str
19
+ messages: Annotated[list[AnyMessage], add_messages]
20
+
21
+ builder = StateGraph(AgentState)
22
+
23
+ ####### tools and tools node #######
24
+
25
+ from z_http_request import get_http_request
26
+ from z_search import tool_google_search,search_information_from_web
27
+
28
+ tools = [tool_google_search,get_http_request]
29
+ # tools = [search_information_from_web,get_http_request]
30
+
31
+ builder.add_node("tools", ToolNode(tools))
32
+
33
+ ############# LLM instance ##########
34
+ # model = "gpt-o3"
35
+ # model = "gpt-4o"
36
+ model = "gpt-4.1"
37
+ OPENAI_KEY=os.getenv("OPENAI_KEY")
38
+ llm = ChatOpenAI(model=model,api_key=OPENAI_KEY)
39
+ llm_with_tools = llm.bind_tools(tools, parallel_tool_calls=False)
40
+
41
+ ############# the only node ##########
42
+ # node analysis
43
+
44
+ def analysis(state: AgentState):
45
+ if not state["messages"]:
46
+ # first time call analysis
47
+ # no extra file at all
48
+ user_message = HumanMessage(content=f"{state['question']}")
49
+ state["messages"].append(user_message)
50
+ print(state)
51
+
52
+ sys_prompt = """You solve GAIA Level-1 tasks.
53
+ Return ONLY the final answer string (no extra words).Match requested format exactly.
54
+ Here are some example:
55
+ question: 3+8=? ; answer is : 11 (note: answer is not "3+8=11")
56
+ question: who is the president of USA? ; answer is : Donult Trump (note: answer is not "the president of USA is Donult Trump")
57
+ if the question is like "how many ...", just answer the number ONLY
58
+ if the question is like "where ...", just answer the location ONLY
59
+ Use web search tool only when needed.
60
+ Use http request tool if nessesary to get more detailed information after the searching
61
+ Use extract_text tool to get text from a image if there is a image as an attachment
62
+ if the question is writter in other language, directly answer it with the language.
63
+ when writing a comma-delimited list, you should format it as item1, item2, item3—not item1,item2,item3
64
+
65
+ You do not know what date is today. So, if there is any question about date,try to search result for it.
66
+
67
+ Strip leading/trailing whitespace from the final answer.
68
+ """
69
+ sys_msg = SystemMessage(content=sys_prompt)
70
+
71
+ import time
72
+ from openai import RateLimitError
73
+
74
+ max_retries = 3
75
+ retry_delay = 3
76
+
77
+ for attempt in range(max_retries):
78
+ try:
79
+ response = llm_with_tools.invoke([sys_msg] + state["messages"])
80
+ break
81
+ except RateLimitError as e:
82
+ print(f"try {attempt + 1} failed:{e}")
83
+ if attempt < max_retries - 1:
84
+ print(f"wait {retry_delay} seconds ...")
85
+ time.sleep(retry_delay)
86
+ print(f"let's try again ...")
87
+ retry_delay *= 2
88
+ else:
89
+ print("max retry times")
90
+ raise
91
+
92
+ # response = llm_with_tools.invoke([sys_msg] + state["messages"])
93
+ return {"messages": [response]}
94
+
95
+ builder.add_node("analysis", analysis)
96
+
97
+ ############ edges ################
98
+ builder.add_edge(START, "analysis")
99
+ builder.add_conditional_edges(
100
+ "analysis",
101
+ # If the latest message requires a tool, route to tools
102
+ # Otherwise, provide a direct response
103
+ tools_condition,
104
+ )
105
+ builder.add_edge("tools", "analysis")
106
+
107
+ ######### compile the agent ##########
108
+ zex_vs_gaia_graph = builder.compile()