# tools/visuals.py  —  reusable Plotly helpers
# ------------------------------------------------------------

import os
import tempfile
from typing import List, Tuple, Union

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from scipy.cluster.hierarchy import linkage, leaves_list

# -----------------------------------------------------------------
# Typing alias: every helper returns a plotly.graph_objects.Figure
# -----------------------------------------------------------------
Plot = go.Figure


# -----------------------------------------------------------------
# Utility: save figure to high‑res PNG under a writable dir (/tmp)
# -----------------------------------------------------------------
def _save_fig(fig: Plot, prefix: str, outdir: str = "/tmp") -> str:
    os.makedirs(outdir, exist_ok=True)
    tmp = tempfile.NamedTemporaryFile(
        prefix=prefix, suffix=".png", dir=outdir, delete=False
    )
    fig.write_image(tmp.name, scale=3)
    return tmp.name


# -----------------------------------------------------------------
# 1) Histogram (+ optional KDE)
# -----------------------------------------------------------------
def histogram_tool(
    file_path: str,
    column: str,
    bins: int = 30,
    kde: bool = True,
    output_dir: str = "/tmp",
) -> Union[Tuple[Plot, str], str]:
    ext = os.path.splitext(file_path)[1].lower()
    df = pd.read_excel(file_path) if ext in (".xls", ".xlsx") else pd.read_csv(file_path)

    if column not in df.columns:
        return f"❌ Column '{column}' not found."
    series = pd.to_numeric(df[column], errors="coerce").dropna()
    if series.empty:
        return f"❌ No numeric data in '{column}'."

    if kde:
        # density + hist using numpy histogram
        hist, edges = np.histogram(series, bins=bins)
        fig = go.Figure()
        fig.add_bar(x=edges[:-1], y=hist, name="Histogram")
        fig.add_scatter(
            x=np.linspace(series.min(), series.max(), 500),
            y=np.exp(np.poly1d(np.polyfit(series, np.log(series.rank()), 1))(
                np.linspace(series.min(), series.max(), 500)
            )),
            mode="lines",
            name="KDE (approx)",
        )
    else:
        fig = px.histogram(
            series, nbins=bins, title=f"Histogram – {column}", template="plotly_dark"
        )

    fig.update_layout(template="plotly_dark")
    return fig, _save_fig(fig, f"hist_{column}_", output_dir)


# -----------------------------------------------------------------
# 2) Box plot
# -----------------------------------------------------------------
def boxplot_tool(
    file_path: str,
    column: str,
    output_dir: str = "/tmp",
) -> Union[Tuple[Plot, str], str]:
    ext = os.path.splitext(file_path)[1].lower()
    df = pd.read_excel(file_path) if ext in (".xls", ".xlsx") else pd.read_csv(file_path)
    if column not in df.columns:
        return f"❌ Column '{column}' not found."
    series = pd.to_numeric(df[column], errors="coerce").dropna()
    if series.empty:
        return f"❌ No numeric data in '{column}'."

    fig = px.box(
        series, points="outliers", title=f"Boxplot – {column}", template="plotly_dark"
    )
    return fig, _save_fig(fig, f"box_{column}_", output_dir)


# -----------------------------------------------------------------
# 3) Violin plot
# -----------------------------------------------------------------
def violin_tool(
    file_path: str,
    column: str,
    output_dir: str = "/tmp",
) -> Union[Tuple[Plot, str], str]:
    ext = os.path.splitext(file_path)[1].lower()
    df = pd.read_excel(file_path) if ext in (".xls", ".xlsx") else pd.read_csv(file_path)
    if column not in df.columns:
        return f"❌ Column '{column}' not found."
    series = pd.to_numeric(df[column], errors="coerce").dropna()
    if series.empty:
        return f"❌ No numeric data in '{column}'."

    fig = px.violin(
        series, box=True, points="all", title=f"Violin – {column}", template="plotly_dark"
    )
    return fig, _save_fig(fig, f"violin_{column}_", output_dir)


# -----------------------------------------------------------------
# 4) Scatter‑matrix
# -----------------------------------------------------------------
def scatter_matrix_tool(
    file_path: str,
    columns: List[str],
    output_dir: str = "/tmp",
    size: int = 5,
) -> Union[Tuple[Plot, str], str]:
    ext = os.path.splitext(file_path)[1].lower()
    df = pd.read_excel(file_path) if ext in (".xls", ".xlsx") else pd.read_csv(file_path)

    missing = [c for c in columns if c not in df.columns]
    if missing:
        return f"❌ Missing columns: {', '.join(missing)}"
    df_num = df[columns].apply(pd.to_numeric, errors="coerce").dropna()
    if df_num.empty:
        return "❌ No valid numeric data."

    fig = px.scatter_matrix(
        df_num, dimensions=columns, title="Scatter Matrix", template="plotly_dark"
    )
    fig.update_traces(diagonal_visible=False, marker=dict(size=size))
    return fig, _save_fig(fig, "scatter_matrix_", output_dir)


# -----------------------------------------------------------------
# 5) Correlation heat‑map (optional clustering)
# -----------------------------------------------------------------
def corr_heatmap_tool(
    file_path: str,
    columns: List[str] | None = None,
    output_dir: str = "/tmp",
    cluster: bool = True,
) -> Union[Tuple[Plot, str], str]:
    ext = os.path.splitext(file_path)[1].lower()
    df = pd.read_excel(file_path) if ext in (".xls", ".xlsx") else pd.read_csv(file_path)

    df_num = df.select_dtypes("number") if columns is None else df[columns]
    df_num = df_num.apply(pd.to_numeric, errors="coerce").dropna(axis=1, how="all")
    if df_num.shape[1] < 2:
        return "❌ Need ≥ 2 numeric columns."

    corr = df_num.corr()
    if cluster:
        order = leaves_list(linkage(corr, "average"))
        corr = corr.iloc[order, order]

    fig = px.imshow(
        corr,
        color_continuous_scale="RdBu",
        title="Correlation Heat‑map",
        labels=dict(color="ρ"),
        template="plotly_dark",
    )
    return fig, _save_fig(fig, "corr_heatmap_", output_dir)