Final_Assignment_Template

Sleeping

App Files Files Community

DiegoSanC commited on Jun 25

Commit

144c032

1 Parent(s): d4598ef

feat: Add solution based on smolagents

Browse files

Files changed (5) hide show

.gitignore +3 -1
README.md +3 -1
agent.py +25 -137
app.py +3 -12
requirements.txt +8 -7

.gitignore CHANGED Viewed

	@@ -1 +1,3 @@
1	- .env

+.env
+./conda-env/*
+conda-env/*

README.md CHANGED Viewed

@@ -12,4 +12,6 @@ hf_oauth: true
 hf_oauth_expiration_minutes: 480
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 hf_oauth_expiration_minutes: 480
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+Inside "langgraph-wip" folder there is another README.md explaining the rationale of that folder.

agent.py CHANGED Viewed

@@ -1,11 +1,8 @@
 from dotenv import load_dotenv
-from langchain_core.messages import SystemMessage
-from langchain_openai import ChatOpenAI
-from langgraph.graph import StateGraph, START, MessagesState
-from langgraph.prebuilt import ToolNode, tools_condition
-from langchain_core.tools import tool
-from langchain_community.document_loaders import WebBaseLoader, WikipediaLoader, ArxivLoader
 from youtube_transcript_api import YouTubeTranscriptApi
 load_dotenv()
@@ -82,144 +79,35 @@ def modulo(a: int, b: int) -> int:
     return a % b
 @tool
-def wikipedia_search(query: str) -> str:
-    """
-    Search Wikipedia for information
-    Args:
-        query: The query to search for
-    Returns:
-        The search results
-    """
-    docs_found = WikipediaLoader(query=query, load_max_docs=5).load()
-    # format the docs found into a string keeping just first paragraph
-    formatted_results = []
-    for i, doc in enumerate(docs_found, 1):
-        source = doc.metadata.get('source', 'Unknown source')
-        title = doc.metadata.get('title', 'Untitled')
-        # Get the first paragraph (split by \n\n and take first part)
-        content = doc.page_content.strip()
-        first_paragraph = content.split('\n\n')[0] if content else "No content available"
-        formatted_doc = f"""--- DOCUMENT {i} START ---
-Source: {source}
-Title: {title}
-Content: {first_paragraph}
---- DOCUMENT {i} END ---"""
-        formatted_results.append(formatted_doc)
-    return "\n\n".join(formatted_results)
-@tool
-def arxiv_search(query: str) -> str:
-    """
-    Search ArXiv for research papers
-    Args:
-        query: The query to search for
-    Returns:
-        The search results with abstracts
-    """
-    docs_found = ArxivLoader(query=query, load_max_docs=3).load()
-    formatted_results = []
-    for i, doc in enumerate(docs_found, 1):
-        source = doc.metadata.get('source', 'Unknown source')
-        title = doc.metadata.get('title', 'Untitled')
-        # For ArXiv, the abstract is typically in the page_content or metadata
-        abstract = doc.page_content.strip() if doc.page_content else "No abstract available"
-        formatted_doc = f"""--- DOCUMENT {i} START ---
-Source: {source}
-Title: {title}
-Abstract: {abstract}
---- DOCUMENT {i} END ---"""
-        formatted_results.append(formatted_doc)
-    return "\n\n".join(formatted_results)
-@tool
-def web_search(query: str) -> str:
-    """
-    Search the web for information
-    Args:
-        query: The query to search for (should be a list of URLs or single URL)
-    Returns:
-        The search results with first 1000 characters
-    """
-    # Note: WebBaseLoader requires URLs, so this assumes query contains URLs
-    # For a more general web search, you'd need a different approach like SerpAPI
-    try:
-        if isinstance(query, str):
-            urls = [query] if query.startswith('http') else []
-        else:
-            urls = query
-        if not urls:
-            return "No valid URLs provided for web search."
-        # Limit to 4 URLs maximum
-        urls = urls[:4]
-        docs_found = WebBaseLoader(urls).load()
-        formatted_results = []
-        for i, doc in enumerate(docs_found, 1):
-            source = doc.metadata.get('source', 'Unknown source')
-            title = doc.metadata.get('title', 'Untitled')
-            # Get first 1000 characters of content
-            content = doc.page_content.strip()
-            first_1000_chars = content[:1000] if content else "No content available"
-            if len(content) > 1000:
-                first_1000_chars += "..."
-            formatted_doc = f"""--- DOCUMENT {i} START ---
-Source: {source}
-Title: {title}
-Content: {first_1000_chars}
---- DOCUMENT {i} END ---"""
-            formatted_results.append(formatted_doc)
-        return "\n\n".join(formatted_results)
-    except Exception as e:
-        return f"Error during web search: {str(e)}"
-@tool
-def youtube_transcript(url: str) -> str:
     """
     Get transcript of YouTube video.
     Args:
         url: YouTube video url in ""
     """
     video_id = url.partition("https://www.youtube.com/watch?v=")[2]
     transcript = YouTubeTranscriptApi.get_transcript(video_id)
     transcript_text = " ".join([item["text"] for item in transcript])
     return {"youtube_transcript": transcript_text}
-tools = [add, subtract, multiply,
-         divide, modulo, wikipedia_search,
-         arxiv_search, web_search, youtube_transcript]
-system_message = SystemMessage(content=system_prompt)
-def build_graph(provider: str = "openai"):
-    llm = ChatOpenAI(model="gpt-4o", temperature=0, max_retries=2)
-    llm_with_tools = llm.bind_tools(tools)
-    def assistant(state: MessagesState):
-        """ Assistant node that will receive a question and return an answer """
-        return {"messages": [llm_with_tools.invoke([system_message] + state["messages"])]}
-    builder = StateGraph(MessagesState)
-    builder.add_node("assistant", assistant)
-    builder.add_node("tools", ToolNode(tools))
-    builder.add_edge(START, "assistant")
-    builder.add_conditional_edges("assistant", tools_condition)
-    builder.add_edge("tools", "assistant")
-    return builder.compile()

 from dotenv import load_dotenv
+from smolagents import tool, PythonInterpreterTool, DuckDuckGoSearchTool, WikipediaSearchTool, VisitWebpageTool, HfApiModel, GoogleSearchTool, ToolCallingAgent, CodeAgent, LiteLLMModel
 from youtube_transcript_api import YouTubeTranscriptApi
+import os
+from typing import Dict
 load_dotenv()
     return a % b
 @tool
+def youtube_transcript(url: str) -> Dict[str, str]:
     """
     Get transcript of YouTube video.
     Args:
         url: YouTube video url in ""
+    Returns:
+        Transcript of the YouTube video
     """
     video_id = url.partition("https://www.youtube.com/watch?v=")[2]
     transcript = YouTubeTranscriptApi.get_transcript(video_id)
     transcript_text = " ".join([item["text"] for item in transcript])
     return {"youtube_transcript": transcript_text}
+class BasicSmolAgent:
+    def __init__(self):
+        self.api_key = os.getenv("OPENAI_API_KEY")
+        self.model = LiteLLMModel(model_id="openai/o4-mini", api_key=self.api_key)
+        self.agent = CodeAgent(
+            tools=[
+                add, subtract, multiply, divide, modulo,
+                youtube_transcript,
+                DuckDuckGoSearchTool(),
+                WikipediaSearchTool(),
+                VisitWebpageTool(),
+                GoogleSearchTool(),
+            ],
+            model=self.model
+        )
+    def __call__(self, question: str) -> str:
+        print(f"Question: {question}")
+        return self.agent.run(question)

app.py CHANGED Viewed

@@ -3,23 +3,14 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
-from agent import build_graph
-from langchain_core.messages import HumanMessage
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-        self.graph = build_graph()
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        messages = [HumanMessage(content=question)]
-        response = self.graph.invoke({"messages": messages})
-        return response["messages"][-1].content
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
@@ -42,7 +33,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None

 import requests
 import inspect
 import pandas as pd
+from agent import BasicSmolAgent
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        agent = BasicSmolAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None

requirements.txt CHANGED Viewed

@@ -1,14 +1,15 @@
 gradio
 requests
 python-dotenv
-langchain
-langchain-core
-langchain-community
-langchain-tavily
-langchain-google-genai
-langchain-openai
 langgraph
 wikipedia
 arxiv
 youtube_transcript_api
-httpx

 gradio
 requests
 python-dotenv
+# langchain
+# langchain-core
+# langchain-community
+# langchain-tavily
+# langchain-google-genai
+# langchain-openai
 langgraph
 wikipedia
 arxiv
 youtube_transcript_api
+httpx
+smolagents[litellm]