"""
Utility functions for the Iain Morris article generator project
"""

import json
import os
import logging
from typing import Dict, List, Optional
import requests
from datetime import datetime

logger = logging.getLogger(__name__)

def setup_logging(log_level: str = "INFO"):
    """
    Setup logging configuration
    
    Args:
        log_level: Logging level (DEBUG, INFO, WARNING, ERROR)
    """
    logging.basicConfig(
        level=getattr(logging, log_level.upper()),
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        handlers=[
            logging.FileHandler('morris_bot.log'),
            logging.StreamHandler()
        ]
    )

def ensure_directories():
    """Ensure all required directories exist"""
    directories = [
        'data',
        'models',
        'models/lora_adapters',
        'logs'
    ]
    
    for directory in directories:
        os.makedirs(directory, exist_ok=True)
        logger.info(f"Ensured directory exists: {directory}")

def load_json(filepath: str) -> Optional[Dict]:
    """
    Load JSON file safely
    
    Args:
        filepath: Path to JSON file
        
    Returns:
        Loaded data or None if failed
    """
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            return json.load(f)
    except Exception as e:
        logger.error(f"Error loading JSON from {filepath}: {e}")
        return None

def save_json(data: Dict, filepath: str):
    """
    Save data to JSON file
    
    Args:
        data: Data to save
        filepath: Output file path
    """
    try:
        os.makedirs(os.path.dirname(filepath), exist_ok=True)
        with open(filepath, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2, ensure_ascii=False)
        logger.info(f"Saved data to {filepath}")
    except Exception as e:
        logger.error(f"Error saving JSON to {filepath}: {e}")

def validate_articles(articles: List[Dict]) -> List[Dict]:
    """
    Validate article data structure
    
    Args:
        articles: List of article dictionaries
        
    Returns:
        List of valid articles
    """
    valid_articles = []
    required_fields = ['title', 'content', 'author', 'url']
    
    for i, article in enumerate(articles):
        if all(field in article and article[field] for field in required_fields):
            valid_articles.append(article)
        else:
            logger.warning(f"Article {i} missing required fields: {article.get('title', 'Unknown')}")
    
    logger.info(f"Validated {len(valid_articles)} out of {len(articles)} articles")
    return valid_articles

def get_model_info():
    """Get information about available models"""
    model_info = {
        "base_models": {
            "mistralai/Mistral-7B-Instruct-v0.1": {
                "description": "High-quality 7B parameter model, excellent for fine-tuning",
                "memory_requirement": "~14GB GPU memory with 4-bit quantization",
                "recommended": True
            },
            "meta-llama/Llama-2-7b-chat-hf": {
                "description": "Popular 7B chat model, good performance",
                "memory_requirement": "~14GB GPU memory with 4-bit quantization",
                "recommended": True
            },
            "microsoft/DialoGPT-medium": {
                "description": "Smaller model, faster training but lower quality",
                "memory_requirement": "~4GB GPU memory",
                "recommended": False
            }
        },
        "training_requirements": {
            "minimum_gpu_memory": "8GB",
            "recommended_gpu_memory": "16GB+",
            "training_time_estimate": "4-6 hours on RTX 3080",
            "cpu_training": "Possible but very slow (24+ hours)"
        }
    }
    
    return model_info

def check_system_requirements():
    """Check if system meets requirements for training"""
    requirements = {
        "python_version": True,
        "torch_available": False,
        "cuda_available": False,
        "gpu_memory": 0,
        "disk_space": True
    }
    
    try:
        import torch
        requirements["torch_available"] = True
        
        if torch.cuda.is_available():
            requirements["cuda_available"] = True
            requirements["gpu_memory"] = torch.cuda.get_device_properties(0).total_memory / 1e9
            
    except ImportError:
        pass
    
    return requirements

def estimate_training_time(num_articles: int, gpu_memory: float) -> str:
    """
    Estimate training time based on dataset size and hardware
    
    Args:
        num_articles: Number of training articles
        gpu_memory: GPU memory in GB
        
    Returns:
        Estimated training time string
    """
    if gpu_memory >= 16:
        base_time = 0.5  # minutes per article
    elif gpu_memory >= 8:
        base_time = 1.0
    else:
        base_time = 5.0  # CPU training
    
    total_minutes = num_articles * base_time * 3  # 3 epochs
    
    if total_minutes < 60:
        return f"~{int(total_minutes)} minutes"
    else:
        hours = total_minutes / 60
        return f"~{hours:.1f} hours"

def create_project_summary() -> Dict:
    """Create a summary of the project status"""
    summary = {
        "timestamp": datetime.now().isoformat(),
        "files_created": [],
        "data_status": {},
        "model_status": {},
        "next_steps": []
    }
    
    # Check which files exist
    files_to_check = [
        "requirements.txt",
        "app.py",
        "src/scraper.py",
        "src/preprocess.py",
        "src/finetune.py",
        "src/utils.py"
    ]
    
    for file_path in files_to_check:
        if os.path.exists(file_path):
            summary["files_created"].append(file_path)
    
    # Check data status
    if os.path.exists("data/raw_articles.json"):
        articles = load_json("data/raw_articles.json")
        if articles:
            summary["data_status"]["raw_articles"] = len(articles)
    
    if os.path.exists("data/train_dataset.json"):
        train_data = load_json("data/train_dataset.json")
        if train_data:
            summary["data_status"]["training_examples"] = len(train_data)
    
    # Check model status
    if os.path.exists("models/lora_adapters"):
        summary["model_status"]["lora_adapters"] = "Available"
    else:
        summary["model_status"]["lora_adapters"] = "Not trained"
    
    # Determine next steps
    if not summary["data_status"]:
        summary["next_steps"].append("1. Run scraper to collect articles")
        summary["next_steps"].append("2. Run preprocessing to prepare training data")
        summary["next_steps"].append("3. Run fine-tuning to train the model")
        summary["next_steps"].append("4. Launch the Gradio app")
    elif "training_examples" not in summary["data_status"]:
        summary["next_steps"].append("1. Run preprocessing to prepare training data")
        summary["next_steps"].append("2. Run fine-tuning to train the model")
        summary["next_steps"].append("3. Launch the Gradio app")
    elif summary["model_status"]["lora_adapters"] == "Not trained":
        summary["next_steps"].append("1. Run fine-tuning to train the model")
        summary["next_steps"].append("2. Launch the Gradio app")
    else:
        summary["next_steps"].append("1. Launch the Gradio app")
        summary["next_steps"].append("2. Test article generation")
    
    return summary

def print_project_status():
    """Print current project status"""
    summary = create_project_summary()
    
    print("\n" + "="*60)
    print("🤖 IAIN MORRIS ARTICLE GENERATOR - PROJECT STATUS")
    print("="*60)
    
    print(f"\n📅 Last Updated: {summary['timestamp']}")
    
    print(f"\n📁 Files Created ({len(summary['files_created'])}):")
    for file_path in summary['files_created']:
        print(f"  ✅ {file_path}")
    
    print(f"\n📊 Data Status:")
    if summary['data_status']:
        for key, value in summary['data_status'].items():
            print(f"  📈 {key}: {value}")
    else:
        print("  ❌ No data collected yet")
    
    print(f"\n🤖 Model Status:")
    for key, value in summary['model_status'].items():
        status_icon = "✅" if value == "Available" else "❌"
        print(f"  {status_icon} {key}: {value}")
    
    print(f"\n🎯 Next Steps:")
    for step in summary['next_steps']:
        print(f"  {step}")
    
    print("\n" + "="*60)

if __name__ == "__main__":
    setup_logging()
    ensure_directories()
    print_project_status()