import os from dotenv import load_dotenv from typing import List from typing_extensions import TypedDict from pydantic import BaseModel, Field from langgraph.graph import START, END, StateGraph from langgraph.checkpoint.memory import MemorySaver from langchain_core.messages import AIMessage, HumanMessage, SystemMessage import operator from typing import Annotated from langgraph.graph import MessagesState from langchain_openai import ChatOpenAI import gradio as gr import uuid from gradio.themes.utils import colors from gradio.themes import Base load_dotenv() os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY') os.environ["TAVILY_API_KEY"] = os.getenv('TAVILY_API_KEY') os.environ["LANGCHAIN_TRACING_V2"] = "true" os.environ["LANGCHAIN_API_KEY"] = os.getenv('LANGCHAIN_API_KEY') os.environ["LANGCHAIN_PROJECT"] = "Research Assistant v1" llm = ChatOpenAI(model="gpt-4o", temperature=0) from typing import List from typing_extensions import TypedDict from pydantic import BaseModel, Field class Analyst(BaseModel): affiliation: str = Field( description="Primary affiliation of the analyst.", ) name: str = Field( description="Name of the analyst." ) role: str = Field( description="Role of the analyst in the context of the topic.", ) description: str = Field( description="Description of the analyst focus, concerns, and motives.", ) @property def persona(self) -> str: return f"Name: {self.name}\nRole: {self.role}\nAffiliation: {self.affiliation}\nDescription: {self.description}\n" class Perspectives(BaseModel): analysts: List[Analyst] = Field( description="Comprehensive list of analysts with their roles and affiliations.", ) class GenerateAnalystsState(TypedDict): topic: str # Research topic max_analysts: int # Number of analysts human_analyst_feedback: str # Human feedback analysts: List[Analyst] # Analyst asking questions from langgraph.graph import START, END, StateGraph from langgraph.checkpoint.memory import MemorySaver from langchain_core.messages import AIMessage, HumanMessage, SystemMessage analyst_instructions="""You are tasked with creating a set of AI analyst personas. Follow these instructions carefully: 1. First, review the research topic: {topic} 2. Examine any editorial feedback that has been optionally provided to guide creation of the analysts: {human_analyst_feedback} 3. Determine the most interesting themes based upon documents and / or feedback above. 4. Pick the top {max_analysts} themes. 5. Assign one analyst to each theme.""" def create_analysts(state: GenerateAnalystsState): """ Create analysts """ topic=state['topic'] max_analysts=state['max_analysts'] human_analyst_feedback=state.get('human_analyst_feedback', '') # Enforce structured output structured_llm = llm.with_structured_output(Perspectives) # System message system_message = analyst_instructions.format(topic=topic, human_analyst_feedback=human_analyst_feedback, max_analysts=max_analysts) # Generate question analysts = structured_llm.invoke([SystemMessage(content=system_message)]+[HumanMessage(content="Generate the set of analysts.")]) # Write the list of analysis to state return {"analysts": analysts.analysts} def human_feedback(state: GenerateAnalystsState): """ No-op node that should be interrupted on """ pass def should_continue(state: GenerateAnalystsState): """ Return the next node to execute """ # Check if human feedback human_analyst_feedback=state.get('human_analyst_feedback', None) if human_analyst_feedback: return "create_analysts" # Otherwise end return END import operator from typing import Annotated from langgraph.graph import MessagesState class InterviewState(MessagesState): max_num_turns: int # Number turns of conversation context: Annotated[list, operator.add] # Source docs analyst: Analyst # Analyst asking questions interview: str # Interview transcript sections: list # Final key we duplicate in outer state for Send() API class SearchQuery(BaseModel): search_query: str = Field(None, description="Search query for retrieval.") question_instructions = """You are an analyst tasked with interviewing an expert to learn about a specific topic. Your goal is boil down to interesting and specific insights related to your topic. 1. Interesting: Insights that people will find surprising or non-obvious. 2. Specific: Insights that avoid generalities and include specific examples from the expert. Here is your topic of focus and set of goals: {goals} Begin by introducing yourself using a name that fits your persona, and then ask your question. Continue to ask questions to drill down and refine your understanding of the topic. When you are satisfied with your understanding, complete the interview with: "Thank you so much for your help!" Remember to stay in character throughout your response, reflecting the persona and goals provided to you.""" def generate_question(state: InterviewState): """ Node to generate a question """ # Get state analyst = state["analyst"] messages = state["messages"] # Generate question system_message = question_instructions.format(goals=analyst.persona) question = llm.invoke([SystemMessage(content=system_message)]+messages) # Write messages to state return {"messages": [question]} from langchain_community.document_loaders import WikipediaLoader from langchain_community.tools.tavily_search import TavilySearchResults from langchain_core.messages import get_buffer_string from langchain_core.documents import Document import arxiv # Search query writing search_instructions = SystemMessage(content=f"""You will be given a conversation between an analyst and an expert. Your goal is to generate a well-structured query for use in retrieval and / or web-search related to the conversation. First, analyze the full conversation. Pay particular attention to the final question posed by the analyst. Convert this final question into a well-structured web search query""") def search_web(state: InterviewState): """ Retrieve docs from web search """ # Search query structured_llm = llm.with_structured_output(SearchQuery) search_query = structured_llm.invoke([search_instructions]+state['messages']) # Search tavily_search = TavilySearchResults(max_results=5) search_docs = tavily_search.invoke(search_query.search_query) # Debug: Print the type and content of search_docs print(f"Type of search_docs: {type(search_docs)}") print(f"Content of search_docs: {search_docs}") # Format try: formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc["content"]}\n' for doc in search_docs ] ) except TypeError as e: print(f"Error in formatting search_docs: {e}") # Fallback: treat search_docs as a single string if it's not iterable formatted_search_docs = f"\n{search_docs}\n" return {"context": [formatted_search_docs]} def search_wikipedia(state: InterviewState): """ Retrieve docs from wikipedia """ # Search query structured_llm = llm.with_structured_output(SearchQuery) search_query = structured_llm.invoke([search_instructions]+state['messages']) # Search search_docs = WikipediaLoader(query=search_query.search_query, load_max_docs=2).load() # Format formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content}\n' for doc in search_docs ] ) return {"context": [formatted_search_docs]} def search_arxiv(state: InterviewState): """ Retrieve docs from arxiv """ # Search query structured_llm = llm.with_structured_output(SearchQuery) search_query = structured_llm.invoke([search_instructions]+state['messages']) # Search arXiv search = arxiv.Search( query=search_query.search_query, max_results=10, sort_by=arxiv.SortCriterion.Relevance ) # Retrieve results search_docs = [] for result in search.results(): doc = Document( page_content=f"{result.title}\n\n{result.summary}", metadata={ "title": result.title, "authors": ", ".join(author.name for author in result.authors), "published": result.published.strftime("%Y-%m-%d"), "url": result.entry_id, } ) search_docs.append(doc) # Format formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content}\n' for doc in search_docs ] ) return {"context": [formatted_search_docs]} answer_instructions = """You are an expert being interviewed by an analyst. Here is analyst area of focus: {goals}. You goal is to answer a question posed by the interviewer. To answer question, use this context: {context} When answering questions, follow these guidelines: 1. Use only the information provided in the context. 2. Do not introduce external information or make assumptions beyond what is explicitly stated in the context. 3. The context contain sources at the topic of each individual document. 4. Include these sources your answer next to any relevant statements. For example, for source # 1 use [1]. 5. List your sources in order at the bottom of your answer. [1] Source 1, [2] Source 2, etc 6. If the source is: ' then just list: [1] assistant/docs/llama3_1.pdf, page 7 And skip the addition of the brackets as well as the Document source preamble in your citation.""" def generate_answer(state: InterviewState): """ Node to answer a question """ # Get state analyst = state["analyst"] messages = state["messages"] context = state["context"] # Answer question system_message = answer_instructions.format(goals=analyst.persona, context=context) answer = llm.invoke([SystemMessage(content=system_message)]+messages) # Name the message as coming from the expert answer.name = "expert" # Append it to state return {"messages": [answer]} def save_interview(state: InterviewState): """ Save interviews """ # Get messages messages = state["messages"] # Convert interview to a string interview = get_buffer_string(messages) # Save to interviews key return {"interview": interview} def route_messages(state: InterviewState, name: str = "expert"): """ Route between question and answer """ # Get messages messages = state["messages"] max_num_turns = state.get('max_num_turns',2) # Check the number of expert answers num_responses = len( [m for m in messages if isinstance(m, AIMessage) and m.name == name] ) # End if expert has answered more than the max turns if num_responses >= max_num_turns: return 'save_interview' # This router is run after each question - answer pair # Get the last question asked to check if it signals the end of discussion last_question = messages[-2] if "Thank you so much for your help" in last_question.content: return 'save_interview' return "ask_question" section_writer_instructions = """You are an expert technical writer. Your task is to create a short, easily digestible section of a report based on a set of source documents. 1. Analyze the content of the source documents: - The name of each source document is at the start of the document, with the