DiegoSanC commited on
Commit
144c032
·
1 Parent(s): d4598ef

feat: Add solution based on smolagents

Browse files
Files changed (5) hide show
  1. .gitignore +3 -1
  2. README.md +3 -1
  3. agent.py +25 -137
  4. app.py +3 -12
  5. requirements.txt +8 -7
.gitignore CHANGED
@@ -1 +1,3 @@
1
- .env
 
 
 
1
+ .env
2
+ ./conda-env/*
3
+ conda-env/*
README.md CHANGED
@@ -12,4 +12,6 @@ hf_oauth: true
12
  hf_oauth_expiration_minutes: 480
13
  ---
14
 
15
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
12
  hf_oauth_expiration_minutes: 480
13
  ---
14
 
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
16
+
17
+ Inside "langgraph-wip" folder there is another README.md explaining the rationale of that folder.
agent.py CHANGED
@@ -1,11 +1,8 @@
1
  from dotenv import load_dotenv
2
- from langchain_core.messages import SystemMessage
3
- from langchain_openai import ChatOpenAI
4
- from langgraph.graph import StateGraph, START, MessagesState
5
- from langgraph.prebuilt import ToolNode, tools_condition
6
- from langchain_core.tools import tool
7
- from langchain_community.document_loaders import WebBaseLoader, WikipediaLoader, ArxivLoader
8
  from youtube_transcript_api import YouTubeTranscriptApi
 
 
9
 
10
  load_dotenv()
11
 
@@ -82,144 +79,35 @@ def modulo(a: int, b: int) -> int:
82
  return a % b
83
 
84
  @tool
85
- def wikipedia_search(query: str) -> str:
86
- """
87
- Search Wikipedia for information
88
- Args:
89
- query: The query to search for
90
- Returns:
91
- The search results
92
- """
93
- docs_found = WikipediaLoader(query=query, load_max_docs=5).load()
94
- # format the docs found into a string keeping just first paragraph
95
- formatted_results = []
96
-
97
- for i, doc in enumerate(docs_found, 1):
98
- source = doc.metadata.get('source', 'Unknown source')
99
- title = doc.metadata.get('title', 'Untitled')
100
-
101
- # Get the first paragraph (split by \n\n and take first part)
102
- content = doc.page_content.strip()
103
- first_paragraph = content.split('\n\n')[0] if content else "No content available"
104
-
105
- formatted_doc = f"""--- DOCUMENT {i} START ---
106
- Source: {source}
107
- Title: {title}
108
- Content: {first_paragraph}
109
- --- DOCUMENT {i} END ---"""
110
-
111
- formatted_results.append(formatted_doc)
112
-
113
- return "\n\n".join(formatted_results)
114
-
115
- @tool
116
- def arxiv_search(query: str) -> str:
117
- """
118
- Search ArXiv for research papers
119
- Args:
120
- query: The query to search for
121
- Returns:
122
- The search results with abstracts
123
- """
124
- docs_found = ArxivLoader(query=query, load_max_docs=3).load()
125
- formatted_results = []
126
-
127
- for i, doc in enumerate(docs_found, 1):
128
- source = doc.metadata.get('source', 'Unknown source')
129
- title = doc.metadata.get('title', 'Untitled')
130
-
131
- # For ArXiv, the abstract is typically in the page_content or metadata
132
- abstract = doc.page_content.strip() if doc.page_content else "No abstract available"
133
-
134
- formatted_doc = f"""--- DOCUMENT {i} START ---
135
- Source: {source}
136
- Title: {title}
137
- Abstract: {abstract}
138
- --- DOCUMENT {i} END ---"""
139
-
140
- formatted_results.append(formatted_doc)
141
-
142
- return "\n\n".join(formatted_results)
143
-
144
- @tool
145
- def web_search(query: str) -> str:
146
- """
147
- Search the web for information
148
- Args:
149
- query: The query to search for (should be a list of URLs or single URL)
150
- Returns:
151
- The search results with first 1000 characters
152
- """
153
- # Note: WebBaseLoader requires URLs, so this assumes query contains URLs
154
- # For a more general web search, you'd need a different approach like SerpAPI
155
- try:
156
- if isinstance(query, str):
157
- urls = [query] if query.startswith('http') else []
158
- else:
159
- urls = query
160
-
161
- if not urls:
162
- return "No valid URLs provided for web search."
163
-
164
- # Limit to 4 URLs maximum
165
- urls = urls[:4]
166
- docs_found = WebBaseLoader(urls).load()
167
- formatted_results = []
168
-
169
- for i, doc in enumerate(docs_found, 1):
170
- source = doc.metadata.get('source', 'Unknown source')
171
- title = doc.metadata.get('title', 'Untitled')
172
-
173
- # Get first 1000 characters of content
174
- content = doc.page_content.strip()
175
- first_1000_chars = content[:1000] if content else "No content available"
176
- if len(content) > 1000:
177
- first_1000_chars += "..."
178
-
179
- formatted_doc = f"""--- DOCUMENT {i} START ---
180
- Source: {source}
181
- Title: {title}
182
- Content: {first_1000_chars}
183
- --- DOCUMENT {i} END ---"""
184
-
185
- formatted_results.append(formatted_doc)
186
-
187
- return "\n\n".join(formatted_results)
188
-
189
- except Exception as e:
190
- return f"Error during web search: {str(e)}"
191
-
192
- @tool
193
- def youtube_transcript(url: str) -> str:
194
  """
195
  Get transcript of YouTube video.
196
  Args:
197
  url: YouTube video url in ""
 
 
198
  """
199
  video_id = url.partition("https://www.youtube.com/watch?v=")[2]
200
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
201
  transcript_text = " ".join([item["text"] for item in transcript])
202
  return {"youtube_transcript": transcript_text}
203
 
204
- tools = [add, subtract, multiply,
205
- divide, modulo, wikipedia_search,
206
- arxiv_search, web_search, youtube_transcript]
207
-
208
- system_message = SystemMessage(content=system_prompt)
209
-
210
- def build_graph(provider: str = "openai"):
211
- llm = ChatOpenAI(model="gpt-4o", temperature=0, max_retries=2)
212
- llm_with_tools = llm.bind_tools(tools)
213
-
214
- def assistant(state: MessagesState):
215
- """ Assistant node that will receive a question and return an answer """
216
- return {"messages": [llm_with_tools.invoke([system_message] + state["messages"])]}
217
-
218
- builder = StateGraph(MessagesState)
219
- builder.add_node("assistant", assistant)
220
- builder.add_node("tools", ToolNode(tools))
221
- builder.add_edge(START, "assistant")
222
- builder.add_conditional_edges("assistant", tools_condition)
223
- builder.add_edge("tools", "assistant")
224
-
225
- return builder.compile()
 
1
  from dotenv import load_dotenv
2
+ from smolagents import tool, PythonInterpreterTool, DuckDuckGoSearchTool, WikipediaSearchTool, VisitWebpageTool, HfApiModel, GoogleSearchTool, ToolCallingAgent, CodeAgent, LiteLLMModel
 
 
 
 
 
3
  from youtube_transcript_api import YouTubeTranscriptApi
4
+ import os
5
+ from typing import Dict
6
 
7
  load_dotenv()
8
 
 
79
  return a % b
80
 
81
  @tool
82
+ def youtube_transcript(url: str) -> Dict[str, str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  """
84
  Get transcript of YouTube video.
85
  Args:
86
  url: YouTube video url in ""
87
+ Returns:
88
+ Transcript of the YouTube video
89
  """
90
  video_id = url.partition("https://www.youtube.com/watch?v=")[2]
91
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
92
  transcript_text = " ".join([item["text"] for item in transcript])
93
  return {"youtube_transcript": transcript_text}
94
 
95
+ class BasicSmolAgent:
96
+ def __init__(self):
97
+ self.api_key = os.getenv("OPENAI_API_KEY")
98
+ self.model = LiteLLMModel(model_id="openai/o4-mini", api_key=self.api_key)
99
+ self.agent = CodeAgent(
100
+ tools=[
101
+ add, subtract, multiply, divide, modulo,
102
+ youtube_transcript,
103
+ DuckDuckGoSearchTool(),
104
+ WikipediaSearchTool(),
105
+ VisitWebpageTool(),
106
+ GoogleSearchTool(),
107
+ ],
108
+ model=self.model
109
+ )
110
+
111
+ def __call__(self, question: str) -> str:
112
+ print(f"Question: {question}")
113
+ return self.agent.run(question)
 
 
 
app.py CHANGED
@@ -3,23 +3,14 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- from agent import build_graph
7
- from langchain_core.messages import HumanMessage
8
  # (Keep Constants as is)
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  # --- Basic Agent Definition ---
13
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
14
- class BasicAgent:
15
- def __init__(self):
16
- print("BasicAgent initialized.")
17
- self.graph = build_graph()
18
- def __call__(self, question: str) -> str:
19
- print(f"Agent received question (first 50 chars): {question[:50]}...")
20
- messages = [HumanMessage(content=question)]
21
- response = self.graph.invoke({"messages": messages})
22
- return response["messages"][-1].content
23
 
24
  def run_and_submit_all( profile: gr.OAuthProfile | None):
25
  """
@@ -42,7 +33,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
42
 
43
  # 1. Instantiate Agent ( modify this part to create your agent)
44
  try:
45
- agent = BasicAgent()
46
  except Exception as e:
47
  print(f"Error instantiating agent: {e}")
48
  return f"Error initializing agent: {e}", None
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from agent import BasicSmolAgent
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
  # --- Basic Agent Definition ---
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
+
 
 
 
 
 
 
 
 
14
 
15
  def run_and_submit_all( profile: gr.OAuthProfile | None):
16
  """
 
33
 
34
  # 1. Instantiate Agent ( modify this part to create your agent)
35
  try:
36
+ agent = BasicSmolAgent()
37
  except Exception as e:
38
  print(f"Error instantiating agent: {e}")
39
  return f"Error initializing agent: {e}", None
requirements.txt CHANGED
@@ -1,14 +1,15 @@
1
  gradio
2
  requests
3
  python-dotenv
4
- langchain
5
- langchain-core
6
- langchain-community
7
- langchain-tavily
8
- langchain-google-genai
9
- langchain-openai
10
  langgraph
11
  wikipedia
12
  arxiv
13
  youtube_transcript_api
14
- httpx
 
 
1
  gradio
2
  requests
3
  python-dotenv
4
+ # langchain
5
+ # langchain-core
6
+ # langchain-community
7
+ # langchain-tavily
8
+ # langchain-google-genai
9
+ # langchain-openai
10
  langgraph
11
  wikipedia
12
  arxiv
13
  youtube_transcript_api
14
+ httpx
15
+ smolagents[litellm]