"""
Scraping and Rating API Endpoints
================================

FastAPI endpoints for web scraping and data rating functionality.
Provides comprehensive API for managing scraping jobs, monitoring progress,
and retrieving rating data.
"""

import logging
from typing import List, Optional, Dict, Any
from datetime import datetime
from fastapi import APIRouter, HTTPException, BackgroundTasks, Query, Depends
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field, HttpUrl
from enum import Enum

from ..services.scraping_service import ScrapingService, ScrapingStrategy
from ..services.rating_service import RatingService

logger = logging.getLogger(__name__)

# Initialize services
scraping_service = ScrapingService()
rating_service = RatingService()

# Request/Response Models


class ScrapingStrategyEnum(str, Enum):
    """Available scraping strategies for API"""
    GENERAL = "general"
    LEGAL_DOCUMENTS = "legal_documents"
    NEWS_ARTICLES = "news_articles"
    ACADEMIC_PAPERS = "academic_papers"
    GOVERNMENT_SITES = "government_sites"
    CUSTOM = "custom"


class ScrapingRequest(BaseModel):
    """Request model for starting a scraping job"""
    urls: List[str] = Field(..., description="List of URLs to scrape")
    strategy: ScrapingStrategyEnum = Field(
        default=ScrapingStrategyEnum.GENERAL, description="Scraping strategy to use")
    keywords: Optional[List[str]] = Field(
        default=None, description="Keywords to filter content")
    content_types: Optional[List[str]] = Field(
        default=None, description="Content types to focus on")
    max_depth: int = Field(default=1, ge=1, le=5,
                           description="Maximum depth for recursive scraping")
    delay_between_requests: float = Field(
        default=1.0, ge=0.1, le=10.0, description="Delay between requests in seconds")


class ScrapingJobResponse(BaseModel):
    """Response model for scraping job"""
    job_id: str
    status: str
    total_items: int
    completed_items: int
    failed_items: int
    progress: float
    created_at: str
    strategy: str


class ScrapedItemResponse(BaseModel):
    """Response model for scraped item"""
    id: str
    url: str
    title: str
    content: str
    metadata: Dict[str, Any]
    timestamp: str
    source_url: str
    rating_score: float
    processing_status: str
    error_message: Optional[str]
    strategy_used: str
    content_hash: str
    word_count: int
    language: str
    domain: str


class RatingSummaryResponse(BaseModel):
    """Response model for rating summary"""
    total_rated: int
    average_score: float
    score_range: Dict[str, float]
    average_confidence: float
    rating_level_distribution: Dict[str, int]
    criteria_averages: Dict[str, float]
    recent_ratings_24h: int


class ScrapingStatisticsResponse(BaseModel):
    """Response model for scraping statistics"""
    total_items: int
    status_distribution: Dict[str, int]
    language_distribution: Dict[str, int]
    average_rating: float
    active_jobs: int
    total_jobs: int


# Create router
router = APIRouter()


@router.post("/scrape", response_model=Dict[str, str])
async def start_scraping_job(request: ScrapingRequest, background_tasks: BackgroundTasks):
    """
    Start a new scraping job

    - **urls**: List of URLs to scrape
    - **strategy**: Scraping strategy to use
    - **keywords**: Optional keywords to filter content
    - **content_types**: Optional content types to focus on
    - **max_depth**: Maximum depth for recursive scraping (1-5)
    - **delay_between_requests**: Delay between requests in seconds (0.1-10.0)
    """
    try:
        # Convert strategy enum to service enum
        strategy_map = {
            ScrapingStrategyEnum.GENERAL: ScrapingStrategy.GENERAL,
            ScrapingStrategyEnum.LEGAL_DOCUMENTS: ScrapingStrategy.LEGAL_DOCUMENTS,
            ScrapingStrategyEnum.NEWS_ARTICLES: ScrapingStrategy.NEWS_ARTICLES,
            ScrapingStrategyEnum.ACADEMIC_PAPERS: ScrapingStrategy.ACADEMIC_PAPERS,
            ScrapingStrategyEnum.GOVERNMENT_SITES: ScrapingStrategy.GOVERNMENT_SITES,
            ScrapingStrategyEnum.CUSTOM: ScrapingStrategy.CUSTOM
        }

        strategy = strategy_map[request.strategy]

        # Start scraping job
        job_id = await scraping_service.start_scraping_job(
            urls=request.urls,
            strategy=strategy,
            keywords=request.keywords,
            content_types=request.content_types,
            max_depth=request.max_depth,
            delay=request.delay_between_requests
        )

        logger.info(
            f"Started scraping job {job_id} with {len(request.urls)} URLs")

        return {
            "job_id": job_id,
            "status": "started",
            "message": f"Scraping job started successfully with {len(request.urls)} URLs"
        }

    except Exception as e:
        logger.error(f"Error starting scraping job: {e}")
        raise HTTPException(
            status_code=500, detail=f"Failed to start scraping job: {str(e)}")


@router.get("/scrape/status", response_model=List[ScrapingJobResponse])
async def get_scraping_jobs_status():
    """
    Get status of all scraping jobs

    Returns list of all active and recent scraping jobs with their progress.
    """
    try:
        jobs = await scraping_service.get_all_jobs()
        return [ScrapingJobResponse(**job) for job in jobs if job is not None]

    except Exception as e:
        logger.error(f"Error getting scraping jobs status: {e}")
        raise HTTPException(
            status_code=500, detail=f"Failed to get scraping jobs status: {str(e)}")


@router.get("/scrape/status/{job_id}", response_model=ScrapingJobResponse)
async def get_scraping_job_status(job_id: str):
    """
    Get status of a specific scraping job

    - **job_id**: ID of the scraping job to check
    """
    try:
        job_status = await scraping_service.get_job_status(job_id)
        if not job_status:
            raise HTTPException(
                status_code=404, detail=f"Scraping job {job_id} not found")

        return ScrapingJobResponse(**job_status)

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error getting scraping job status: {e}")
        raise HTTPException(
            status_code=500, detail=f"Failed to get scraping job status: {str(e)}")


@router.get("/scrape/items", response_model=List[ScrapedItemResponse])
async def get_scraped_items(
    job_id: Optional[str] = Query(None, description="Filter by job ID"),
    limit: int = Query(100, ge=1, le=1000,
                       description="Maximum number of items to return"),
    offset: int = Query(0, ge=0, description="Number of items to skip")
):
    """
    Get scraped items with optional filtering

    - **job_id**: Optional job ID to filter items
    - **limit**: Maximum number of items to return (1-1000)
    - **offset**: Number of items to skip for pagination
    """
    try:
        items = await scraping_service.get_scraped_items(
            job_id=job_id,
            limit=limit,
            offset=offset
        )

        return [ScrapedItemResponse(**item) for item in items]

    except Exception as e:
        logger.error(f"Error getting scraped items: {e}")
        raise HTTPException(
            status_code=500, detail=f"Failed to get scraped items: {str(e)}")


@router.get("/scrape/statistics", response_model=ScrapingStatisticsResponse)
async def get_scraping_statistics():
    """
    Get comprehensive scraping statistics

    Returns overall statistics about scraped items, jobs, and system health.
    """
    try:
        stats = await scraping_service.get_scraping_statistics()
        return ScrapingStatisticsResponse(**stats)

    except Exception as e:
        logger.error(f"Error getting scraping statistics: {e}")
        raise HTTPException(
            status_code=500, detail=f"Failed to get scraping statistics: {str(e)}")


@router.post("/rating/rate/{item_id}")
async def rate_specific_item(item_id: str):
    """
    Rate a specific scraped item

    - **item_id**: ID of the item to rate
    """
    try:
        # Get item data
        items = await scraping_service.get_scraped_items(limit=1000)
        item_data = None

        for item in items:
            if item['id'] == item_id:
                item_data = item
                break

        if not item_data:
            raise HTTPException(
                status_code=404, detail=f"Item {item_id} not found")

        # Rate the item
        rating_result = await rating_service.rate_item(item_data)

        return {
            "item_id": item_id,
            "rating_result": rating_result.to_dict(),
            "message": f"Item {item_id} rated successfully"
        }

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error rating item {item_id}: {e}")
        raise HTTPException(
            status_code=500, detail=f"Failed to rate item: {str(e)}")


@router.post("/rating/rate-all")
async def rate_all_unrated_items():
    """
    Rate all unrated scraped items

    Automatically rates all items that haven't been rated yet.
    """
    try:
        # Get all items
        items = await scraping_service.get_scraped_items(limit=1000)
        unrated_items = [item for item in items if item['rating_score'] == 0.0]

        rated_count = 0
        failed_count = 0

        for item in unrated_items:
            try:
                await rating_service.rate_item(item)
                rated_count += 1
            except Exception as e:
                logger.error(f"Failed to rate item {item['id']}: {e}")
                failed_count += 1

        return {
            "total_items": len(unrated_items),
            "rated_count": rated_count,
            "failed_count": failed_count,
            "message": f"Rated {rated_count} items, {failed_count} failed"
        }

    except Exception as e:
        logger.error(f"Error rating all items: {e}")
        raise HTTPException(
            status_code=500, detail=f"Failed to rate all items: {str(e)}")


@router.get("/rating/summary", response_model=RatingSummaryResponse)
async def get_rating_summary():
    """
    Get comprehensive rating summary

    Returns overall statistics about rated items, score distributions, and criteria averages.
    """
    try:
        summary = await rating_service.get_rating_summary()
        return RatingSummaryResponse(**summary)

    except Exception as e:
        logger.error(f"Error getting rating summary: {e}")
        raise HTTPException(
            status_code=500, detail=f"Failed to get rating summary: {str(e)}")


@router.get("/rating/history/{item_id}")
async def get_item_rating_history(item_id: str):
    """
    Get rating history for a specific item

    - **item_id**: ID of the item to get history for
    """
    try:
        history = await rating_service.get_item_rating_history(item_id)
        return {
            "item_id": item_id,
            "history": history,
            "total_changes": len(history)
        }

    except Exception as e:
        logger.error(f"Error getting rating history for item {item_id}: {e}")
        raise HTTPException(
            status_code=500, detail=f"Failed to get rating history: {str(e)}")


@router.post("/rating/re-evaluate/{item_id}")
async def re_evaluate_item(item_id: str):
    """
    Re-evaluate a specific item

    - **item_id**: ID of the item to re-evaluate
    """
    try:
        rating_result = await rating_service.re_evaluate_item(item_id)

        if not rating_result:
            raise HTTPException(
                status_code=404, detail=f"Item {item_id} not found")

        return {
            "item_id": item_id,
            "rating_result": rating_result.to_dict(),
            "message": f"Item {item_id} re-evaluated successfully"
        }

    except HTTPException:
        raise
    except Exception as e:
        logger.error(f"Error re-evaluating item {item_id}: {e}")
        raise HTTPException(
            status_code=500, detail=f"Failed to re-evaluate item: {str(e)}")


@router.get("/rating/low-quality")
async def get_low_quality_items(
    threshold: float = Query(
        0.4, ge=0.0, le=1.0, description="Quality threshold"),
    limit: int = Query(
        50, ge=1, le=200, description="Maximum number of items to return")
):
    """
    Get items with low quality ratings

    - **threshold**: Quality threshold (0.0-1.0)
    - **limit**: Maximum number of items to return (1-200)
    """
    try:
        items = await rating_service.get_low_quality_items(threshold=threshold, limit=limit)

        return {
            "threshold": threshold,
            "total_items": len(items),
            "items": items
        }

    except Exception as e:
        logger.error(f"Error getting low quality items: {e}")
        raise HTTPException(
            status_code=500, detail=f"Failed to get low quality items: {str(e)}")


@router.delete("/scrape/cleanup")
async def cleanup_old_jobs(days: int = Query(7, ge=1, le=30, description="Days to keep jobs")):
    """
    Clean up old completed jobs

    - **days**: Number of days to keep jobs (1-30)
    """
    try:
        await scraping_service.cleanup_old_jobs(days=days)

        return {
            "message": f"Cleaned up jobs older than {days} days",
            "days": days
        }

    except Exception as e:
        logger.error(f"Error cleaning up old jobs: {e}")
        raise HTTPException(
            status_code=500, detail=f"Failed to cleanup old jobs: {str(e)}")


@router.get("/health")
async def scraping_health_check():
    """
    Health check for scraping and rating services

    Returns status of both scraping and rating services.
    """
    try:
        # Check scraping service
        scraping_stats = await scraping_service.get_scraping_statistics()

        # Check rating service
        rating_summary = await rating_service.get_rating_summary()

        return {
            "status": "healthy",
            "timestamp": datetime.now().isoformat(),
            "services": {
                "scraping": {
                    "active_jobs": scraping_stats.get('active_jobs', 0),
                    "total_items": scraping_stats.get('total_items', 0)
                },
                "rating": {
                    "total_rated": rating_summary.get('total_rated', 0),
                    "average_score": rating_summary.get('average_score', 0)
                }
            }
        }

    except Exception as e:
        logger.error(f"Health check failed: {e}")
        return {
            "status": "unhealthy",
            "timestamp": datetime.now().isoformat(),
            "error": str(e)
        }