Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| from langgraph.graph import START, StateGraph, MessagesState | |
| from langgraph.prebuilt import tools_condition | |
| from langgraph.prebuilt import ToolNode | |
| from langchain_groq import ChatGroq | |
| from langchain_community.document_loaders import WikipediaLoader | |
| from langchain_community.document_loaders import ArxivLoader | |
| from langchain_core.messages import SystemMessage, HumanMessage | |
| from langchain_core.tools import tool | |
| import requests | |
| from bs4 import BeautifulSoup | |
| import urllib.parse | |
| load_dotenv() | |
| def wiki_search(query: str) -> str: | |
| """Search Wikipedia for information. | |
| Args: | |
| query: The search query.""" | |
| try: | |
| search_docs = WikipediaLoader(query=query, load_max_docs=2).load() | |
| formatted_search_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' | |
| for doc in search_docs | |
| ]) | |
| return {"wiki_results": formatted_search_docs} | |
| except Exception as e: | |
| return f"Error searching Wikipedia: {str(e)}" | |
| def web_search(query: str) -> str: | |
| """Search the web using DuckDuckGo. | |
| Args: | |
| query: The search query.""" | |
| try: | |
| encoded_query = urllib.parse.quote(query) | |
| url = f"https://html.duckduckgo.com/html/?q={encoded_query}" | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| } | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| results = [] | |
| for result in soup.find_all('div', class_='result__body'): | |
| title = result.find('h2', class_='result__title') | |
| snippet = result.find('a', class_='result__snippet') | |
| if title and snippet: | |
| results.append(f"Title: {title.get_text()}\nSnippet: {snippet.get_text()}") | |
| if len(results) >= 3: | |
| break | |
| return {"web_results": "\n\n".join(results) if results else "No results found"} | |
| except Exception as e: | |
| return f"Error searching web: {str(e)}" | |
| def arxiv_search(query: str) -> str: | |
| """Search Arxiv for scientific papers. | |
| Args: | |
| query: The search query.""" | |
| try: | |
| search_docs = ArxivLoader(query=query, load_max_docs=2).load() | |
| formatted_search_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>' | |
| for doc in search_docs | |
| ]) | |
| return {"arxiv_results": formatted_search_docs} | |
| except Exception as e: | |
| return f"Error searching Arxiv: {str(e)}" | |
| # System prompt | |
| system_prompt = """You are a highly accurate question-answering assistant. Your task is to provide precise, direct answers to questions. | |
| Key Rules: | |
| 1. Answer Format: | |
| - For numbers: Provide only the number without units, commas, or formatting | |
| - For text: Use minimal words, no articles or abbreviations | |
| - For lists: Use comma-separated values without additional formatting | |
| - For dates: Use YYYY-MM-DD format unless specified otherwise | |
| - For names: Use full names without titles or honorifics | |
| - For country codes: Use official IOC codes (3 letters) | |
| - For chess moves: Use standard algebraic notation | |
| - For currency: Use numbers only, no symbols | |
| 2. Answer Guidelines: | |
| - Be extremely precise and direct | |
| - Do not include any explanatory text | |
| - Do not use phrases like "FINAL ANSWER" or any markers | |
| - Do not include units unless explicitly requested | |
| - Do not use abbreviations unless they are standard (e.g., DNA, RNA) | |
| - For multiple choice: Provide only the letter or number of the correct answer | |
| - For reversed text: Provide the answer in normal text | |
| - For file-based questions: Focus on the specific information requested | |
| 3. Error Handling: | |
| - If uncertain, provide the most likely answer based on available information | |
| - If completely unsure, provide a reasonable default rather than an error message | |
| - For file processing errors, indicate the specific issue | |
| 4. Special Cases: | |
| - For mathematical questions: Provide the exact numerical result | |
| - For historical dates: Use the most widely accepted date | |
| - For scientific terms: Use the standard scientific notation | |
| - For geographical locations: Use official names without abbreviations | |
| - For audio/video questions: Focus on the specific detail requested""" | |
| # System message | |
| sys_msg = SystemMessage(content=system_prompt) | |
| # Tools list | |
| tools = [ | |
| wiki_search, | |
| web_search, | |
| arxiv_search, | |
| ] | |
| def build_graph(): | |
| """Build the graph""" | |
| # Initialize Groq LLM | |
| llm = ChatGroq( | |
| model="meta-llama/llama-4-maverick-17b-128e-instruct", | |
| temperature=0.1 | |
| ) | |
| # Bind tools to LLM | |
| llm_with_tools = llm.bind_tools(tools) | |
| # Node | |
| def assistant(state: MessagesState): | |
| """Assistant node""" | |
| return {"messages": [llm_with_tools.invoke(state["messages"])]} | |
| # Build graph | |
| builder = StateGraph(MessagesState) | |
| builder.add_node("assistant", assistant) | |
| builder.add_node("tools", ToolNode(tools)) | |
| builder.add_edge(START, "assistant") | |
| builder.add_conditional_edges( | |
| "assistant", | |
| tools_condition, | |
| ) | |
| builder.add_edge("tools", "assistant") | |
| # Compile graph | |
| return builder.compile() | |
| # Test | |
| if __name__ == "__main__": | |
| question = "When was a picture of St. Thomas Aquinas first added to the Wikipedia page on the Principle of double effect?" | |
| # Build the graph | |
| graph = build_graph() | |
| # Run the graph | |
| messages = [HumanMessage(content=question)] | |
| messages = graph.invoke({"messages": messages}) | |
| for m in messages["messages"]: | |
| m.pretty_print() |