Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from dotenv import load_dotenv | |
| import os | |
| from langchain_community.document_loaders import WebBaseLoader | |
| from langchain.agents import AgentExecutor, create_react_agent | |
| from langchain import hub | |
| from langchain.tools import tool | |
| import ast | |
| import warnings | |
| # Load environment variables | |
| load_dotenv() | |
| api_key = os.getenv("GOOGLE_API_KEY") | |
| model = 'gemini-2.5-flash' | |
| def web_scrap_tool(url): | |
| ''' Scrapes content from a list of URLs. | |
| The input should be a string representation of a Python list of URLs (e.g., "['https://example.com']"). | |
| Returns the concatenated text content of all scraped pages. | |
| ''' | |
| try: | |
| url_list = ast.literal_eval(url) | |
| if not isinstance(url_list, list) or not all(isinstance(url, str) for url in url_list): | |
| raise ValueError("Input must be a list of URLs as strings. Example: ['https://example.com']") | |
| except (ValueError, SyntaxError) as e: | |
| raise ValueError(f"Invalid input format: {e}. Please provide a valid python list of URLs.") | |
| combined_content = [] | |
| for url in url_list: | |
| try: | |
| loader = WebBaseLoader( | |
| [url], | |
| requests_kwargs={"headers": {"User-Agent": "caramel-AI"}} | |
| ) | |
| documents = loader.load() | |
| for doc in documents: | |
| combined_content.append(doc.page_content) | |
| except Exception as e: | |
| combined_content.append(f"Could not scrape {url}. Error: {e}.") | |
| return "\n".join(combined_content) | |
| # Create a single instance of the agent executor | |
| llm = ChatGoogleGenerativeAI(model=model, api_key=api_key) | |
| tools = [web_scrap_tool] | |
| prompt = hub.pull("hwchase17/react") | |
| agent = create_react_agent(llm, tools, prompt=prompt) | |
| agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, handle_parsing_errors=True) | |
| # Function for Gradio interface | |
| def ask_agent(user_input): | |
| try: | |
| result = agent_executor.invoke({'input': user_input}) | |
| return result['output'] | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Gradio UI | |
| ui = gr.Interface( | |
| fn=ask_agent, | |
| inputs=gr.Textbox(label="Ask about a webpage", placeholder="e.g., What is the content of the 'About Us' page of https://example.com?"), | |
| outputs=gr.Textbox(label="Agent Response"), | |
| title="Web Scraping Agent", | |
| description="Ask a question about the contents of any webpage. The agent will scrape and respond accordingly." | |
| ) | |
| if __name__ == "__main__": | |
| ui.launch() |