File size: 9,725 Bytes
d09f6aa 100024e d09f6aa 100024e 0333a17 d09f6aa 313f83b d09f6aa 313f83b 56fd459 d09f6aa 9b095d8 d09f6aa 9b095d8 d09f6aa 9b095d8 d09f6aa 0333a17 07fe6c3 2ec553e d09f6aa 07fe6c3 0333a17 6604cbf d09f6aa 2ec553e d09f6aa 2ec553e 313f83b 2ec553e 313f83b 2ec553e 6604cbf 2ec553e d09f6aa 100024e 0333a17 d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e 07fe6c3 100024e d09f6aa 100024e d09f6aa 100024e d09f6aa 100024e 313f83b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
# Module for core card generation logic
import gradio as gr
import pandas as pd
from typing import List, Dict, Any
# Imports from our core modules
from ankigen_core.utils import (
get_logger,
ResponseCache,
strip_html_tags,
)
from ankigen_core.llm_interface import OpenAIClientManager
from ankigen_core.models import (
Card,
) # Import necessary Pydantic models
# Import agent system - required
from ankigen_core.agents.integration import AgentOrchestrator
from agents import set_tracing_disabled
logger = get_logger()
# Disable tracing to prevent metrics persistence issues
set_tracing_disabled(True)
AGENTS_AVAILABLE = True
logger.info("Agent system loaded successfully")
# --- Constants --- (Moved from app.py)
AVAILABLE_MODELS = [
{
"value": "gpt-5.1",
"label": "GPT-5.1 (Best Quality)",
"description": "Latest model with adaptive reasoning, 400K context",
},
{
"value": "gpt-4.1",
"label": "GPT-4.1 (Legacy)",
"description": "Previous generation, large context window",
},
{
"value": "gpt-4.1-nano",
"label": "GPT-4.1 Nano (Legacy Fast)",
"description": "Previous generation, ultra-fast",
},
]
GENERATION_MODES = [
{
"value": "subject",
"label": "Single Subject",
"description": "Generate cards for a specific topic",
},
]
# --- Core Functions --- (Moved and adapted from app.py)
# Legacy functions removed - all card generation now handled by agent system
def _map_generation_mode_to_subject(generation_mode: str, subject: str) -> str:
"""Map UI generation mode to agent subject."""
if generation_mode == "subject":
return subject if subject else "general"
elif generation_mode == "path":
return "curriculum_design"
elif generation_mode == "text":
return "content_analysis"
return "general"
def _build_generation_context(generation_mode: str, source_text: str) -> Dict[str, Any]:
"""Build context dict for card generation."""
context: Dict[str, Any] = {}
if generation_mode == "text" and source_text:
context["source_text"] = source_text
return context
def _get_token_usage_html(token_tracker) -> str:
"""Extract token usage and format as HTML."""
try:
if hasattr(token_tracker, "get_session_summary"):
token_usage = token_tracker.get_session_summary()
elif hasattr(token_tracker, "get_session_usage"):
token_usage = token_tracker.get_session_usage()
else:
raise AttributeError("TokenTracker has no session summary method")
return f"<div style='margin-top: 8px;'><b>Token Usage:</b> {token_usage['total_tokens']} tokens</div>"
except Exception as e:
logger.error(f"Token usage collection failed: {e}")
return "<div style='margin-top: 8px;'><b>Token Usage:</b> No usage data</div>"
def _format_cards_to_dataframe(
agent_cards: List[Card], subject: str
) -> tuple[pd.DataFrame, str]:
"""Format agent cards to DataFrame and generate message."""
formatted_cards = format_cards_for_dataframe(
agent_cards,
topic_name=subject if subject else "General",
start_index=1,
)
output_df = pd.DataFrame(formatted_cards, columns=get_dataframe_columns())
total_cards_message = f"<div><b>Cards Generated:</b> <span id='total-cards-count'>{len(output_df)}</span></div>"
return output_df, total_cards_message
async def orchestrate_card_generation(
client_manager: OpenAIClientManager,
cache: ResponseCache,
api_key_input: str,
subject: str,
generation_mode: str,
source_text: str,
url_input: str,
model_name: str,
topic_number: int,
cards_per_topic: int,
preference_prompt: str,
generate_cloze: bool,
use_llm_judge: bool = False,
library_name: str = None,
library_topic: str = None,
topics_list: List[str] = None,
):
"""Orchestrates the card generation process based on UI inputs."""
logger.info(f"Starting card generation orchestration in {generation_mode} mode")
logger.debug(
f"Parameters: mode={generation_mode}, topics={topic_number}, "
f"cards_per_topic={cards_per_topic}, cloze={generate_cloze}"
)
if not AGENTS_AVAILABLE:
logger.error("Agent system is required but not available")
gr.Error("Agent system is required but not available")
return pd.DataFrame(columns=get_dataframe_columns()), "Agent system error", ""
try:
from ankigen_core.agents.token_tracker import get_token_tracker
token_tracker = get_token_tracker()
orchestrator = AgentOrchestrator(client_manager)
logger.info(f"Using {model_name} for SubjectExpertAgent")
await orchestrator.initialize(api_key_input, {"subject_expert": model_name})
agent_subject = _map_generation_mode_to_subject(generation_mode, subject)
context = _build_generation_context(generation_mode, source_text)
total_cards_needed = topic_number * cards_per_topic
agent_cards, agent_metadata = await orchestrator.generate_cards_with_agents(
topic=subject if subject else "Mixed Topics",
subject=agent_subject,
num_cards=total_cards_needed,
difficulty="intermediate",
context=context,
library_name=library_name,
library_topic=library_topic,
generate_cloze=generate_cloze,
topics_list=topics_list,
cards_per_topic=cards_per_topic,
)
token_usage_html = _get_token_usage_html(token_tracker)
if agent_cards:
output_df, total_cards_message = _format_cards_to_dataframe(
agent_cards, subject
)
logger.info(f"Agent system generated {len(output_df)} cards successfully")
return output_df, total_cards_message, token_usage_html
logger.error("Agent system returned no cards")
gr.Error("Agent system returned no cards")
return (
pd.DataFrame(columns=get_dataframe_columns()),
"Agent system returned no cards.",
"",
)
except Exception as e:
logger.error(f"Agent system failed: {e}")
gr.Error(f"Agent system error: {str(e)}")
return (
pd.DataFrame(columns=get_dataframe_columns()),
f"Agent system error: {str(e)}",
"",
)
# Legacy helper functions removed - all processing now handled by agent system
# --- Formatting and Utility Functions --- (Moved and adapted)
def format_cards_for_dataframe(
cards: list[Card], topic_name: str, topic_index: int = 0, start_index: int = 1
) -> list:
"""Formats a list of Card objects into a list of dictionaries for DataFrame display.
Ensures all data is plain text.
"""
formatted_cards = []
for i, card_obj in enumerate(cards):
actual_index = start_index + i
card_type = card_obj.card_type or "basic"
question = card_obj.front.question or ""
answer = card_obj.back.answer or ""
explanation = card_obj.back.explanation or ""
example = card_obj.back.example or ""
# Metadata processing
metadata = card_obj.metadata or {}
prerequisites = metadata.get("prerequisites", [])
learning_outcomes = metadata.get("learning_outcomes", [])
difficulty = metadata.get("difficulty", "N/A")
# Ensure list-based metadata are joined as plain strings for DataFrame
prerequisites_str = strip_html_tags(
", ".join(prerequisites)
if isinstance(prerequisites, list)
else str(prerequisites)
)
learning_outcomes_str = strip_html_tags(
", ".join(learning_outcomes)
if isinstance(learning_outcomes, list)
else str(learning_outcomes)
)
difficulty_str = strip_html_tags(str(difficulty))
formatted_card = {
"Index": (
f"{topic_index}.{actual_index}"
if topic_index > 0
else str(actual_index)
),
"Topic": strip_html_tags(topic_name), # Ensure topic is also plain
"Card_Type": strip_html_tags(card_type),
"Question": question, # Already stripped during Card object creation
"Answer": answer, # Already stripped
"Explanation": explanation, # Already stripped
"Example": example, # Already stripped
"Prerequisites": prerequisites_str,
"Learning_Outcomes": learning_outcomes_str,
"Difficulty": difficulty_str, # Ensure difficulty is plain text
"Source_URL": strip_html_tags(
metadata.get("source_url", "")
), # Ensure Source_URL is plain
}
formatted_cards.append(formatted_card)
return formatted_cards
def get_dataframe_columns() -> list[str]:
"""Returns the standard list of columns for the Anki card DataFrame."""
return [
"Index",
"Topic",
"Card_Type",
"Question",
"Answer",
"Explanation",
"Example",
"Prerequisites",
"Learning_Outcomes",
"Difficulty",
"Source_URL",
]
def generate_token_usage_html(token_usage=None):
"""Generate HTML for token usage display"""
if token_usage and isinstance(token_usage, dict):
total_tokens = token_usage.get("total_tokens", 0)
return f"<div style='margin-top: 8px;'><b>Token Usage:</b> {total_tokens} tokens</div>"
else:
return "<div style='margin-top: 8px;'><b>Token Usage:</b> No usage data</div>"
|