import pandas as pd from langchain_community.callbacks import get_openai_callback from langchain_core.prompts import PromptTemplate from langchain_openai import ChatOpenAI import concurrent.futures from prompts.thinking_ladder_agent import template class ThinkingLadderAgent: def __init__(self, model: str = "gpt-4o-mini", temperature: float = 0) -> None: self._prompt = PromptTemplate(input_variables=["query", "article"], template=template) self._llm = ChatOpenAI(model=model, temperature=temperature) self._chain = self._prompt | self._llm def run(self, themes: list[dict[str, str]], sources: list[dict[str, pd.DataFrame]], limit: int = 10) -> tuple[dict[str, pd.DataFrame], dict[str, float]]: results = {} total_metrics = {"tokens": 0, "cost": 0} for theme in themes: articles_df = pd.concat(sources[theme["title"]].values()) results[theme["title"]], metrics = self._process_theme(theme["title"], articles_df, limit) total_metrics["tokens"] += metrics["tokens"] total_metrics["cost"] += metrics["cost"] return results, total_metrics def _process_theme(self, theme_title: str, articles_df: pd.DataFrame, limit: int = 10) -> tuple[pd.DataFrame, dict[str, float]]: articles = articles_df.apply(lambda row: f"Title: {row['title']}\nContent: {row['content']}", axis=1).tolist() classifications = [] total_metrics = {"tokens": 0, "cost": 0} with concurrent.futures.ThreadPoolExecutor() as executor: futures = [executor.submit(self._classify, theme_title, article) for article in articles] for future in concurrent.futures.as_completed(futures): classification, metrics = future.result() classifications.append(classification) total_metrics["tokens"] += metrics["tokens"] total_metrics["cost"] += metrics["cost"] df1 = articles_df.reset_index(drop=True) df2 = pd.DataFrame(classifications).reset_index(drop=True) df = pd.concat([df1, df2], axis=1) df = df[df["class"] == "high"].drop(columns=["class"]) df = df.head(limit) return df, total_metrics def _classify(self, query: str, article: str) -> tuple[dict[str, str], dict[str, float]]: with get_openai_callback() as cb: classification = self._chain.invoke({"query": query, "article": article}).content.strip() tokens = cb.total_tokens cost = cb.total_cost classification = [line for line in classification.split("\n") if line] score = "high" if "high" in classification[0].lower() else "low" keys = ["reason", "summary", "quote"] classification = {keys[i]: item[item.index(":") + 1 :].strip().replace('"', "") for i, item in enumerate(classification[1:])} classification["class"] = score return classification, {"tokens": tokens, "cost": cost}