import re import pandas as pd from langchain_community.callbacks import get_openai_callback from langchain_core.prompts import PromptTemplate from langchain_openai import ChatOpenAI from prompts.synthesis_agent import full_synthesis_template, sub_synthesis_template, sub_themes_template class SynthesisAgent: def __init__(self, model: str = "gpt-4o", temperature: float = 0.7) -> None: self._full_synthesis_prompt = PromptTemplate(input_variables=["query", "syntheses"], template=full_synthesis_template) self._sub_synthesis_prompt = PromptTemplate(input_variables=["query", "theme_info", "articles"], template=sub_synthesis_template) self._sub_themes_prompts = PromptTemplate(input_variables=["theme_info", "articles"], template=sub_themes_template) self._llm = ChatOpenAI(model=model, temperature=temperature) self._full_synthesis_chain = self._full_synthesis_prompt | self._llm self._sub_synthesis_chain = self._sub_synthesis_prompt | self._llm self._sub_themes_chain = self._sub_themes_prompts | self._llm def run(self, query: str, themes: list[dict[str, str]], sources: dict[str, pd.DataFrame]) -> tuple[str, dict[str, float]]: all_sub_syntheses = [] all_sub_themes = [] total_metrics = {"tokens": 0, "cost": 0} for theme in themes: theme_info = f"Theme Title: {theme['title']}\nTheme Description: {theme['description']}" articles = ( sources[theme["title"]] .apply(lambda row: f"[{row.name + 1}]\nTitle: {row.title}\nAuthor: {row.author}\nSummary: {row.summary}\nQuote: {row.quote}", axis=1) .values.tolist() ) synthesis, _, metrics = self._generate_sub_synthesis(query, theme_info, articles) all_sub_syntheses.append(synthesis) total_metrics["tokens"] += metrics["tokens"] total_metrics["cost"] += metrics["cost"] sub_themes, metrics = self._generate_sub_themes(theme_info, articles) total_metrics["tokens"] += metrics["tokens"] total_metrics["cost"] += metrics["cost"] all_sub_themes.append(sub_themes) full_synthesis, metrics = self._generate_full_synthesis(query, all_sub_syntheses) total_metrics["tokens"] += metrics["tokens"] total_metrics["cost"] += metrics["cost"] references = { theme["title"]: sources[theme["title"]].apply(lambda row: f"[{row.name + 1}] [{row.title}]({row.url})", axis=1).values.tolist() for theme in themes } result = self._compose_result(query, full_synthesis, themes, references, all_sub_themes, all_sub_syntheses) return result, total_metrics def _compose_result( self, query: str, full_synthesis: str, themes: list[dict[str, str]], references: list[list[str]], all_sub_themes: list[str], all_sub_syntheses: list[str], ) -> str: section_a = f"### {query}\n{full_synthesis}\n" section_b = "" for i, theme in enumerate(themes): section_b += f"#### {theme['title']}\n{theme['description']}\n\n{all_sub_syntheses[i]}\n\n" elements = [element[element.index(":") + 1 :].strip() for element in all_sub_themes[i].strip().split("\n") if element and ":" in element] for i in range(0, len(elements), 3): try: section_b += f"###### {elements[i]}\n{elements[i + 1]}\n\n{elements[i + 2]}\n\n" except IndexError: pass section_b += f"###### References:\n" + "\n".join(references[theme["title"]]) + "\n\n\n" return f"{section_a}\n\n{section_b}" def _generate_sub_synthesis(self, query: str, theme_info: str, articles: list[str]) -> tuple[str, dict[str, str | int], dict[str, float]]: with get_openai_callback() as cb: overview = self._sub_synthesis_chain.invoke({"query": query, "theme_info": theme_info, "articles": "\n".join(articles)}).content.strip() tokens = cb.total_tokens cost = cb.total_cost print(overview) if "quote:" in overview.lower(): quote = overview[overview.lower().index("quote:") + len("quote:") :].strip() synthesis = overview[: overview.lower().index("quote:")].strip() else: quote = overview[overview.index("\n\n") :].strip() synthesis = overview[: overview.index("\n\n")].strip() elements = overview.split('"')[1:] content = elements[0] try: reference = [int(overview.replace("[", "").replace("]", "")) for overview in re.findall(r"\[[0-9]{1,3}\]", overview)][0] quote = {"content": content, "reference": reference - 1} except IndexError: quote = {} return synthesis, quote, {"tokens": tokens, "cost": cost} def _generate_sub_themes(self, theme_info: str, articles: list[str]) -> tuple[str, dict[str, float]]: with get_openai_callback() as cb: overview = self._sub_themes_chain.invoke({"theme_info": theme_info, "articles": "\n".join(articles)}).content.strip() tokens = cb.total_tokens cost = cb.total_cost print(overview) return overview, {"tokens": tokens, "cost": cost} def _generate_full_synthesis(self, query: str, syntheses: list[str]) -> tuple[str, dict[str, float]]: with get_openai_callback() as cb: overview = self._full_synthesis_chain.invoke({"query": query, "syntheses": "\n".join(syntheses)}).content.strip() tokens = cb.total_tokens cost = cb.total_cost print(overview) return overview, {"tokens": tokens, "cost": cost}