from datasets import load_dataset from typing import Dict, List import re def sanitize_theme_name(theme: str) -> str: """Convert theme name to valid Python identifier""" # Replace non-alphanumeric chars with underscore sanitized = re.sub(r'[^a-zA-Z0-9_]', '_', theme) # Ensure it starts with a letter if not sanitized[0].isalpha(): sanitized = 'theme_' + sanitized return sanitized def load_questions_from_dataset() -> Dict[str, List[Dict]]: """Load and format questions from the HuggingFace dataset""" dataset = load_dataset("RafaelJaime/sas_opposition_exam_data") # Group questions by theme questions_by_theme = {} skipped = 0 loaded = 0 for item in dataset['train']: theme = item['theme'] answers = item.get('answers', []) correct_answer = item.get('correct_answer', '') # Skip invalid questions if not answers or not correct_answer or len(answers) < 3: skipped += 1 continue # Ensure we have exactly 4 options while len(answers) < 4: answers.append(answers[-1]) # Get sanitized theme name for use as identifier sanitized_theme = sanitize_theme_name(theme) if sanitized_theme not in questions_by_theme: questions_by_theme[sanitized_theme] = [] # Format question in our standard format try: question = { "statement": item['statement'], "options": { "A": answers[0], "B": answers[1], "C": answers[2], "D": answers[3] }, "real_answer": correct_answer, "theme": theme, # Keep original theme name for display "sanitized_theme": sanitized_theme, # Add sanitized name for internal use "version": item.get('version', 'Default') } questions_by_theme[sanitized_theme].append(question) loaded += 1 except Exception as e: skipped += 1 continue print(f"Loaded {loaded} questions, skipped {skipped} invalid questions") return questions_by_theme # Load questions from dataset try: EXAM_QUESTIONS = load_questions_from_dataset() print(f"Successfully loaded questions for {len(EXAM_QUESTIONS)} themes") except Exception as e: print(f"Error loading questions from dataset: {str(e)}") # Fallback questions in case dataset is not accessible EXAM_QUESTIONS = { "Administrativo_a": [ { "statement": "El derecho a la protección de la salud se reconoce por la Constitución Española en el:", "options": { "A": "Artículo 44.", "B": "Artículo 46.", "C": "Artículo 43.", "D": "Artículo 41." }, "real_answer": "C", "theme": "Administrativo-a", "sanitized_theme": "Administrativo_a", "version": "Default" } ] }