import logging from datetime import datetime from scraper import BloomingtonScraper from processor import DataProcessor from generator import QAPairGenerator from config import LOG_DIR def setup_logging() -> None: """Set up logging configuration""" log_file = LOG_DIR / f"main_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log" logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler(log_file), logging.StreamHandler() # Also print to console ] ) def main(): setup_logging() logging.info("Starting Bloomington Tourist Guide data collection and QA pair generation") try: # Step 1: Data Collection logging.info("Starting data collection...") scraper = BloomingtonScraper() scraper.scrape_all_categories() search_stats = scraper.get_search_stats() logging.info(f"Data collection completed. Search stats: {search_stats}") # Step 2: Data Processing logging.info("Starting data processing...") processor = DataProcessor() processor.process_all_categories() logging.info("Data processing completed") # Step 3: QA Pair Generation logging.info("Starting QA pair generation...") generator = QAPairGenerator() generator.generate_all_pairs() logging.info("QA pair generation completed") except Exception as e: logging.error(f"Error in main execution: {e}", exc_info=True) raise logging.info("Pipeline completed successfully") if __name__ == "__main__": main()