import logging
from pathlib import Path
from typing import Optional, Tuple

import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objects as go
import requests
import seaborn as sns
from bs4 import BeautifulSoup
from wordcloud import WordCloud

from util import get_max_abs_int, snake_case_to_human_readable, int_csv_to_list


def _rename_columns(df: pd.DataFrame, is_tournament: bool) -> pd.DataFrame:
    columns = {
        "Rating": "rating",
        "Result": "result",
        "Scores": "scores",
        "Opponent": "opponent",
        "OpponentRating": "opponent_rating",
    }

    if is_tournament:
        columns.update({
            "TournamentStartDate": "tournament_start_date",
            "TournamentEndDate": "tournament_end_date",
            " Touranament": "tournament",
        })
    else:
        columns.update({
            "EventDate": "event_date",
            "LeagueName": "league_name"
        })

    return df.rename(columns=columns)


def _fix_dtypes(df: pd.DataFrame, is_tournament: bool) -> pd.DataFrame:
    if is_tournament:
        df["tournament_start_date"] = pd.to_datetime(df["tournament_start_date"])
        df["tournament_end_date"] = pd.to_datetime(df["tournament_end_date"])
        df["tournament"] = df["tournament"].astype('category')
    else:
        df["event_date"] = pd.to_datetime(df["event_date"])
        df["league_name"] = df["league_name"].astype('string')

    df["rating"] = df["rating"].astype('int')
    df["result"] = df["result"].astype('category')
    df["scores"] = df["scores"].astype('string')
    df["opponent"] = df["opponent"].astype('category')
    df["opponent_rating"] = df["opponent_rating"].astype('int')

    return df


def make_df_columns_readable(df: Optional[pd.DataFrame], is_tournament: bool) -> Optional[pd.DataFrame]:
    """Make a data frame's columns human-readable."""
    if df is None:
        return None

    nat_to_none = lambda x: None if x == "NaT" else x
    if is_tournament:
        if "tournament_start_date" in df.columns and "tournament_end_date" in df.columns:
            df['tournament_start_date'] = pd.to_datetime(df['tournament_start_date'])
            df['tournament_end_date'] = pd.to_datetime(df['tournament_end_date'])
            df['tournament_start_date'] = df['tournament_start_date'].dt.date.astype(str).apply(nat_to_none)
            df['tournament_end_date'] = df['tournament_end_date'].dt.date.astype(str).apply(nat_to_none)

            def create_date(tournament_start_date, tournament_end_date):
                missing_start_date = tournament_start_date is None
                missing_end_date = tournament_end_date is None
                if not missing_start_date and not missing_end_date:
                    if tournament_start_date is not tournament_end_date:
                        return ' - '.join((tournament_start_date, tournament_end_date))
                    else:
                        return tournament_start_date
                else:
                    return tournament_start_date if missing_end_date else tournament_end_date

            df["date"] = df.apply(lambda row: create_date(row['tournament_start_date'], row['tournament_end_date']),
                                  axis=1)
            df = df.drop(columns=["tournament_start_date", "tournament_end_date"])

            # Move date to the front.
            columns = list(df.columns)
            columns.insert(0, columns.pop(columns.index("date")))
            df = df.loc[:, columns]
    else:
        if "event_date" in df.columns:
            df['event_date'] = pd.to_datetime(df['event_date'])
            df['event_date'] = df['event_date'].dt.date.astype(str).apply(nat_to_none)
        df = df.rename(columns={"league_name": "league"})

    df = df.rename(columns=lambda c: snake_case_to_human_readable(c))
    return df


def _check_match_type(match_type: str) -> str:
    allowed_match_types = {"tournament", "league"}
    if match_type not in allowed_match_types:
        raise ValueError(
            f"The only supported match types are {allowed_match_types}. Found match type of '{match_type}'.")
    return match_type


def fetch_player_name(profile_id: int) -> str:
    """Fetch a player name from theUSATT website.

    note: the profile ID is NOT the USATT number.
    """
    url = f"https://usatt.simplycompete.com/userAccount/up/{profile_id}"
    logging.info(f"Fetching player name from {url}")
    page = requests.get(url)
    soup = BeautifulSoup(page.content, "html.parser")
    profile_elt = soup.find("div", class_="profile-header")
    return profile_elt.find(class_="title").text.strip()


def get_player_name(file_stem: str) -> str:
    profile_id = int(file_stem.split(" ")[0].replace("_", "").split("matches")[-1])
    return fetch_player_name(profile_id)


def get_num_competitions_played(df: pd.DataFrame, is_tournament: bool) -> int:
    key_name = "tournament_end_date" if is_tournament else "event_date"
    return df[key_name].nunique()


def get_first_competition_year(df: pd.DataFrame, is_tournament: bool) -> int:
    key_name = "tournament_end_date" if is_tournament else "event_date"
    return df[key_name].min().year


def get_num_active_years(df: pd.DataFrame, is_tournament: bool) -> int:
    key_name = "tournament_end_date" if is_tournament else "event_date"
    return df[key_name].dt.year.nunique()


def get_current_rating(df: pd.DataFrame) -> int:
    return df.rating.iloc[0]


def get_max_rating(df: pd.DataFrame) -> int:
    return df.rating.max()


def get_matches_per_competition_fig(df: pd.DataFrame, is_tournament: bool):
    fig = plt.figure()
    plt.title('Matches per competition')
    sns.histplot(df.groupby('tournament' if is_tournament else "event_date", observed=False).size())
    plt.xlabel('Number of matches in competition')
    return fig


def get_competition_name_word_cloud_fig(df: pd.DataFrame, is_tournament: bool):
    fig = plt.figure()
    key_name = "tournament" if is_tournament else "league_name"
    wordcloud = WordCloud().generate(" ".join(df[key_name].values.tolist()))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    return fig


def get_opponent_name_word_cloud_fig(df: pd.DataFrame):
    fig = plt.figure()
    wordcloud = WordCloud().generate(" ".join(df.opponent.values.tolist()))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis("off")
    return fig


def get_rating_over_time_fig(df: pd.DataFrame, is_tournament: bool, span: int = 60):
    df['ema'] = df['rating'].ewm(span=span, adjust=False).mean()

    fig = go.Figure()

    # Raw rating over time trace
    x_key_name = "tournament_end_date" if is_tournament else "event_date"
    fig.add_trace(go.Scatter(x=df[x_key_name],
                             y=df["rating"],
                             name='Rating',
                             mode='lines+markers',
                             line=dict(width=0.9),
                             marker=dict(size=4))),

    # EMA trace
    fig.add_trace(go.Scatter(x=df[x_key_name],
                             y=df["ema"],
                             mode='lines',
                             name='Rating EMA',
                             visible='legendonly',
                             line=dict(width=1.5, dash='dot')))

    fig.update_layout(
        title='Rating over time',
        xaxis_title='Competition date',
        yaxis_title='Rating',
        showlegend=True,
        template="plotly_white",
    )

    return fig


def get_match_with_longest_game(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
    if not is_tournament:
        return None
    df_non_null = df.loc[~df.scores.isna()]
    return df_non_null.iloc[[df_non_null.scores.apply(get_max_abs_int).argmax()]]


def get_win_loss_record_str(group_df) -> str:
    if len(group_df) > 0:
        win_loss_counts = group_df.value_counts()
        n_wins = win_loss_counts.Won if hasattr(win_loss_counts, "Won") else 0
        n_losses = win_loss_counts.Lost if hasattr(win_loss_counts, "Lost") else 0
    else:
        n_wins = 0
        n_losses = 0

    return f"{n_wins}, {n_losses}"


def get_most_frequent_opponents(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
    df_with_opponents = df.loc[df.opponent != "-, -"]

    most_common_opponents_df = df_with_opponents.groupby('opponent', observed=False).agg(
        {"result": [get_win_loss_record_str, "size"]})
    most_common_opponents_df.columns = most_common_opponents_df.columns.get_level_values(1)
    most_common_opponents_df.rename({"get_win_loss_record_str": "Win/loss record", "size": "Number of matches"}, axis=1,
                                    inplace=True)
    most_common_opponents_df["Opponent"] = most_common_opponents_df.index
    return most_common_opponents_df.sort_values("Number of matches", ascending=False)[
        ["Opponent", "Number of matches", "Win/loss record"]].head(top_n)


def get_best_wins(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
    """Get the top-n wins sorted by opponent rating."""
    return df.loc[df.result == 'Won'].sort_values("opponent_rating", ascending=False).head(top_n)


def get_biggest_upsets(df: pd.DataFrame, top_n: int = 5) -> pd.DataFrame:
    """Get the top-n wins sorted by rating difference."""
    df['rating_difference'] = df['opponent_rating'] - df['rating']
    return df.loc[df.result == 'Won'].sort_values("rating_difference", ascending=False).head(top_n)


def get_worst_recent_losses(df: pd.DataFrame,
                            is_tournament: bool,
                            top_k_losses: int = 5,
                            top_n_comps: int = 5) -> pd.DataFrame:
    """Get the top-k most recent worst losses from the top-n most recent competitions."""
    x_key_name = "tournament_end_date" if is_tournament else "event_date"
    most_recent_competition_dates = df.groupby(x_key_name).first().reset_index().nlargest(top_n_comps,
                                                                                          columns=x_key_name)[
        x_key_name]
    df_recent = df.loc[df[x_key_name].isin(most_recent_competition_dates)]
    return df_recent.loc[df_recent.result == 'Lost'].sort_values("opponent_rating", ascending=True).head(top_k_losses)


def get_best_competitions(df: pd.DataFrame, is_tournament: bool, top_n: int = 5) -> pd.DataFrame:
    # First add pre-competition ratings
    x_key_name = "tournament_end_date" if is_tournament else "event_date"
    grouped = df.groupby(x_key_name)

    # We incorrectly fill the first pre-competition rating to the first rating so that
    # the top-k rating differences make sense.
    fill_value = df.iloc[-1].rating
    pre_comp_ratings_by_group = grouped['rating'].first().shift(periods=1, fill_value=fill_value)

    def assign_pre_comp_rating(group_df):
        """Assign a pre-competition rating to a given group."""
        comp_end_date = group_df[x_key_name].unique()[0]
        group_df['pre-competition_rating'] = pre_comp_ratings_by_group.loc[comp_end_date]
        return group_df

    df = grouped.apply(lambda x: assign_pre_comp_rating(x))

    df['rating_increase'] = df['rating'] - df['pre-competition_rating']
    df.reset_index(drop=True, inplace=True)
    best_competition_dates = df.groupby(x_key_name)["rating_increase"].first().nlargest(top_n).index

    tournament_df = df.loc[df[x_key_name].isin(best_competition_dates)].groupby(
        [x_key_name]).first().sort_values(by='rating_increase', ascending=False).reset_index()

    cols = []
    if is_tournament:
        cols += ['tournament_start_date', 'tournament_end_date', 'tournament']
    else:
        cols += ["event_date", "league_name"]
    cols += ['rating_increase', 'pre-competition_rating', 'rating']

    tournament_df = tournament_df[cols]
    tournament_df = tournament_df.rename(columns={"rating": "post-competition_rating"})

    return tournament_df


def get_highest_rated_opponent(df: pd.DataFrame) -> pd.DataFrame:
    return df.iloc[df.opponent_rating.idxmax()].to_frame().transpose()


def get_opponent_rating_distr_fig(df: pd.DataFrame):
    fig = plt.figure()
    plt.title('Opponent rating distribution')
    sns.histplot(data=df, x="opponent_rating", hue='result')
    plt.xlabel('Opponent rating')
    return fig


def get_opponent_rating_dist_over_time_fig(df: pd.DataFrame, is_tournament: bool):
    fig, ax = plt.subplots(figsize=(12, 8))
    plt.title(f'Opponent rating distribution over time')
    x_key_name = "tournament_end_date" if is_tournament else "event_date"
    sns.violinplot(data=df,
                   x=df[x_key_name].dt.year,
                   y="opponent_rating",
                   hue="result",
                   split=True,
                   inner='points',
                   cut=1,
                   ax=ax)
    plt.xticks(rotation=30)
    plt.xlabel('Competition year')
    plt.ylabel('Opponent rating')
    return fig


def get_total_match_points(score_str: str) -> int:
    single_game_scores = int_csv_to_list(score_str)
    total_points = 0
    for single_game_score in single_game_scores:
        abs_gscore = abs(single_game_score)
        if abs_gscore < 10:
            total_points += abs_gscore + 11
        else:
            total_points += 2 * abs_gscore + 2
    return total_points


def get_longest_match(df: pd.DataFrame, is_tournament: bool) -> Optional[pd.DataFrame]:
    """Get the longest match, where longest is defined as the most number of points played."""
    if not is_tournament:
        return None
    df_non_null = df.loc[~df.scores.isna()]
    df_non_null["total_points"] = df_non_null.scores.apply(get_total_match_points)
    return df_non_null.iloc[[df_non_null["total_points"].argmax()]]


def load_match_df(file_path: Path) -> Tuple[pd.DataFrame, bool]:
    match_type = _check_match_type(file_path.name.split('_')[0])
    is_tournament = match_type == "tournament"

    df = pd.read_csv(file_path)
    df = _rename_columns(df, is_tournament)
    df = _fix_dtypes(df, is_tournament)

    logging.info(f"Loaded match CSV {file_path}.")

    return df, is_tournament