import multiprocessing as mp
import os
import tempfile
import time
from pathlib import Path
from typing import Callable

import pandas as pd
from data import generate_data, read_csv
from plots import plot_predictions

EMPTY_DF = lambda: pd.DataFrame(
    {
        "Equation": [],
        "Loss": [],
        "Complexity": [],
    }
)


def pysr_fit(queue: mp.Queue, out_queue: mp.Queue):
    import pysr

    while True:
        # Get the arguments from the queue, if available
        args = queue.get()
        if args is None:
            break
        X = args["X"]
        y = args["y"]
        kwargs = args["kwargs"]
        model = pysr.PySRRegressor(
            progress=False,
            timeout_in_seconds=1000,
            **kwargs,
        )
        model.fit(X, y)
        out_queue.put(None)


def pysr_predict(queue: mp.Queue, out_queue: mp.Queue):
    while True:
        args = queue.get()

        if args is None:
            break

        X = args["X"]
        equation_file = str(args["equation_file"])
        index = args["index"]

        equation_file_pkl = equation_file.replace(".csv", ".pkl")
        equation_file_bkup = equation_file + ".bkup"

        equation_file_copy = equation_file.replace(".csv", "_copy.csv")
        equation_file_pkl_copy = equation_file.replace(".csv", "_copy.pkl")

        # TODO: See if there is way to get lock on file
        os.system(f"cp {equation_file_bkup} {equation_file_copy}")
        os.system(f"cp {equation_file_pkl} {equation_file_pkl_copy}")

        # Note that we import pysr late in this process to avoid
        # pre-compiling the code in two places at once
        import pysr

        try:
            model = pysr.PySRRegressor.from_file(equation_file_pkl_copy, verbosity=0)
        except pd.errors.EmptyDataError:
            continue

        ypred = model.predict(X, index)

        # Rename the columns to uppercase
        equations = model.equations_[["complexity", "loss", "equation"]].copy()

        # Remove any row that has worse loss than previous row:
        equations = equations[equations["loss"].cummin() == equations["loss"]]
        # TODO: Why is this needed? Are rows not being removed?

        equations.columns = ["Complexity", "Loss", "Equation"]
        out_queue.put(dict(ypred=ypred, equations=equations))


class ProcessWrapper:
    def __init__(self, target: Callable[[mp.Queue, mp.Queue], None]):
        self.queue = mp.Queue(maxsize=1)
        self.out_queue = mp.Queue(maxsize=1)
        self.process = mp.Process(target=target, args=(self.queue, self.out_queue))
        self.process.start()


PERSISTENT_WRITER = None
PERSISTENT_READER = None


def processing(
    *,
    file_input,
    force_run,
    test_equation,
    num_points,
    noise_level,
    data_seed,
    niterations,
    maxsize,
    binary_operators,
    unary_operators,
    plot_update_delay,
    parsimony,
    populations,
    population_size,
    ncycles_per_iteration,
    elementwise_loss,
    adaptive_parsimony_scaling,
    optimizer_algorithm,
    optimizer_iterations,
    batching,
    batch_size,
    **kwargs,
):
    """Load data, then spawn a process to run the greet function."""
    global PERSISTENT_WRITER
    global PERSISTENT_READER

    if PERSISTENT_WRITER is None:
        print("Starting PySR fit process")
        PERSISTENT_WRITER = ProcessWrapper(pysr_fit)

    if PERSISTENT_READER is None:
        print("Starting PySR predict process")
        PERSISTENT_READER = ProcessWrapper(pysr_predict)

    if file_input is not None:
        try:
            X, y = read_csv(file_input, force_run)
        except ValueError as e:
            return (EMPTY_DF(), plot_predictions([], []), str(e))
    else:
        X, y = generate_data(test_equation, num_points, noise_level, data_seed)

    tmpdirname = tempfile.mkdtemp()
    base = Path(tmpdirname)
    equation_file = base / "hall_of_fame.csv"
    # Check if queue is empty, if not, kill the process
    # and start a new one
    if not PERSISTENT_WRITER.queue.empty():
        print("Restarting PySR fit process")
        if PERSISTENT_WRITER.process.is_alive():
            PERSISTENT_WRITER.process.terminate()
            PERSISTENT_WRITER.process.join()

        PERSISTENT_WRITER = ProcessWrapper(pysr_fit)

    if not PERSISTENT_READER.queue.empty():
        print("Restarting PySR predict process")
        if PERSISTENT_READER.process.is_alive():
            PERSISTENT_READER.process.terminate()
            PERSISTENT_READER.process.join()

        PERSISTENT_READER = ProcessWrapper(pysr_predict)

    PERSISTENT_WRITER.queue.put(
        dict(
            X=X,
            y=y,
            kwargs=dict(
                niterations=niterations,
                maxsize=maxsize,
                binary_operators=binary_operators,
                unary_operators=unary_operators,
                equation_file=equation_file,
                parsimony=parsimony,
                populations=populations,
                population_size=population_size,
                ncycles_per_iteration=ncycles_per_iteration,
                elementwise_loss=elementwise_loss,
                adaptive_parsimony_scaling=adaptive_parsimony_scaling,
                optimizer_algorithm=optimizer_algorithm,
                optimizer_iterations=optimizer_iterations,
                batching=batching,
                batch_size=batch_size,
            ),
        )
    )

    last_yield = (
        pd.DataFrame({"Complexity": [], "Loss": [], "Equation": []}),
        plot_predictions([], []),
        "Started!",
    )

    yield last_yield

    while PERSISTENT_WRITER.out_queue.empty():
        if (
            equation_file.exists()
            and Path(str(equation_file).replace(".csv", ".pkl")).exists()
        ):
            # First, copy the file to a the copy file
            PERSISTENT_READER.queue.put(
                dict(
                    X=X,
                    equation_file=equation_file,
                    index=-1,
                )
            )
            out = PERSISTENT_READER.out_queue.get()
            predictions = out["ypred"]
            equations = out["equations"]
            last_yield = (
                equations[["Complexity", "Loss", "Equation"]],
                plot_predictions(y, predictions),
                "Running...",
            )
            yield last_yield

        time.sleep(0.1)

    yield (*last_yield[:-1], "Done")