import gradio as gr
from huggingface_hub import list_spaces, list_models, list_datasets
from cachetools import TTLCache, cached
from toolz import groupby, valmap

import platform
from enum import Enum

is_macos = platform.system() == "Darwin"
LIMIT = 1_000_000 if is_macos else None
NONE_AUTHOR = "HuggingFace Team"  # TODO deal with this


class HubRepoType(Enum):
    MODEL = "model"
    DATASET = "dataset"
    SPACE = "space"


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def get_spaces():  # ≈
    return list(list_spaces(full=True, limit=LIMIT))


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def get_models():
    return list(iter(list_models(full=True, limit=LIMIT)))


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def get_datasets():
    return list(iter(list_datasets(full=True, limit=LIMIT)))


get_spaces()  # to warm up the cache
get_models()  # to warm up the cache
get_datasets()  # to warm up the cache


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def valid_dataset_ids():
    return {dataset.id for dataset in get_datasets()}


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def valid_model_ids():
    return {model.id for model in get_models()}


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def valid_space_ids():
    return {space.id for space in get_spaces()}


VALID_DATASET_IDS = valid_dataset_ids()
VALID_MODEL_IDS = valid_model_ids()
VALID_SPACE_IDS = valid_space_ids()


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def create_space_to_like_dict():
    spaces = get_spaces()
    return {space.id: space.likes for space in spaces}


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def create_org_to_space_like_dict():
    spaces = get_spaces()
    grouped = groupby(lambda x: x.author, spaces)
    return valmap(lambda x: sum(s.likes for s in x), grouped)


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def create_model_to_like_dict(metric_kind):
    models = get_models()
    if metric_kind == "likes":
        return {model.id: model.likes for model in models}
    if metric_kind == "downloads":
        return {model.id: model.downloads for model in models}
    raise ValueError(f"Unsupported metric_kind: {metric_kind}")


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def create_org_to_model_metrics(metric_kind="likes"):
    models = get_models()
    # remove authors who are None
    models = [model for model in models if model.author is not None]
    grouped = groupby(lambda x: x.author, models)
    if metric_kind:
        return valmap(lambda x: sum(s.likes for s in x), grouped)
    else:
        return valmap(lambda x: sum(s.downloads for s in x), grouped)


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def create_dataset_to_like_dict(metric_kind="likes"):
    datasets = get_datasets()
    if metric_kind == "likes":
        return {dataset.id: dataset.likes for dataset in datasets}
    if metric_kind == "downloads":
        return {dataset.id: dataset.downloads for dataset in datasets}


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def create_org_to_dataset_metrics(metric_kind="likes"):
    datasets = get_datasets()
    # remove authors who are None
    datasets = [dataset for dataset in datasets if dataset.author is not None]
    grouped = groupby(lambda x: x.author, datasets)
    if metric_kind:
        return valmap(lambda x: sum(s.likes for s in x), grouped)
    else:
        return valmap(lambda x: sum(s.downloads for s in x), grouped)


def relative_rank(my_dict, target_key, filter_zero=False):
    if filter_zero:
        my_dict = {k: v for k, v in my_dict.items() if v != 0}

    if target_key not in my_dict:
        raise gr.Error(f"'{target_key}' not found please check the ID and try again.")

    sorted_items = sorted(my_dict.items(), key=lambda item: item[1], reverse=True)

    position = [key for key, _ in sorted_items].index(target_key)
    num_lower = len(sorted_items) - position - 1
    num_higher = position
    return {
        "rank": (num_higher + 1) / len(my_dict) * 100,
        "num_higher": num_higher,
        "num_lower": num_lower,
        "value": my_dict[target_key],
        "position": num_higher + 1,
    }


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def relative_rank_for_space(space_id, filter_zero=False):
    space_to_like_dict = create_space_to_like_dict()
    return relative_rank(space_to_like_dict, space_id, filter_zero=filter_zero)


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def relative_rank_for_model(model_id, metric_kind="likes", filter_zero=False):
    model_to_like_dict = create_model_to_like_dict(metric_kind)
    return relative_rank(model_to_like_dict, model_id, filter_zero=filter_zero)


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def relative_rank_for_dataset(dataset_id, metric_kind="likes", filter_zero=False):
    dataset_to_like_dict = create_dataset_to_like_dict(metric_kind)
    return relative_rank(dataset_to_like_dict, dataset_id, filter_zero=filter_zero)


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def relative_space_rank_for_org(org_id, filter_zero=False):
    org_to_like_dict = create_org_to_space_like_dict()
    return relative_rank(org_to_like_dict, org_id, filter_zero=filter_zero)


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def relative_model_rank_for_org(org_id, metric_kind="likes", filter_zero=False):
    org_to_like_dict = create_org_to_model_metrics(metric_kind)
    return relative_rank(org_to_like_dict, org_id, filter_zero=filter_zero)


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def relative_dataset_rank_for_org(org_id, metric_kind="likes", filter_zero=False):
    org_to_like_dict = create_org_to_dataset_metrics(metric_kind)
    return relative_rank(org_to_like_dict, org_id, filter_zero=filter_zero)


# @cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
# def rank_space(space_id):
#     return relative_rank_for_space(space_id)


def rank_space_and_org(space_or_org_id, kind, filter_zero):
    filter_zero = filter_zero == "yes"
    split_length = len(space_or_org_id.split("/"))

    # Logic for split_length == 2
    if split_length == 2:
        return _rank_single_repo(space_or_org_id, kind, filter_zero)

    # Handle kind-specific logic for split_length == 1
    if split_length == 1:
        valid_ids = {"model": VALID_MODEL_IDS, "dataset": VALID_DATASET_IDS}

        if kind in valid_ids and space_or_org_id in valid_ids[kind]:
            return _rank_single_repo(space_or_org_id, kind, filter_zero)
        else:
            return _rank_by_org(space_or_org_id, kind, filter_zero)

    # If no conditions match, handle unexpected cases (optional)
    raise ValueError(
        f"Unexpected combination of space_or_org_id '{space_or_org_id}' and kind"
        f" '{kind}'"
    )


def _rank_by_org(space_or_org_id, kind, filter_zero):
    if kind == "space":
        org_rank = relative_space_rank_for_org(space_or_org_id, filter_zero=filter_zero)
    elif kind == "model":
        org_rank = relative_model_rank_for_org(space_or_org_id, filter_zero=filter_zero)
    elif kind == "dataset":
        org_rank = relative_dataset_rank_for_org(
            space_or_org_id, filter_zero=filter_zero
        )
    result = (
        f"## ⭐️ Org/User {kind.title()} Likes Rankings ⭐️\n"
        + f"Here are the rankings for the org/user across all of their {kind}s \n"
    )
    result += f"""- You have {org_rank['value']:,} likes for this org/user.\n"""
    result += f"""- Your org/user is ranked {org_rank['position']:,}\n"""
    result += f"""- You have {org_rank['num_higher']:,} orgs/users above and {org_rank['num_lower']:,} orgs/users below in the ranking of {kind} likes \n\n"""
    result += f"""- Organization or user [{space_or_org_id}](https://huggingface.co/{space_or_org_id}) is ranked in the top {org_rank['rank']:.2f}% \n\n"""
    if kind == "space":
        result += f"""You can find all your Spaces sorted by likes [here](https://huggingface.co/{space_or_org_id}?sort_spaces=likes#spaces)\n"""
    if kind == "model":
        result += f"""You can find all your Models sorted by likes [here](https://huggingface.co/{space_or_org_id}?sort_models=likes#models)\n"""
    if kind == "dataset":
        result += f"""You can find all your Datasets sorted by likes [here](https://huggingface.co/{space_or_org_id}?sort_datasets=likes#datasets)\n"""
    return _create_footer_message(result, kind)


def _rank_single_repo(space_or_org_id, kind, filter_zero):
    if kind == "space":
        repo_rank = relative_rank_for_space(space_or_org_id, filter_zero=filter_zero)
    elif kind == "model":
        repo_rank = relative_rank_for_model(space_or_org_id, filter_zero=filter_zero)
    elif kind == "dataset":
        repo_rank = relative_rank_for_dataset(space_or_org_id, filter_zero=filter_zero)
    result = f"## ⭐️ {kind.title()} Likes Rankings ⭐️\n"
    result += f"""Here are the rankings by likes for [`{space_or_org_id}`](https://huggingface.co/spaces/{space_or_org_id}) across all {kind}s \n"""
    result += f"""- You have {repo_rank['value']:,} likes for this {kind}.\n"""
    result += f"""- Your {kind} is ranked {repo_rank['position']:,}.\n"""
    if kind == "space":
        result += f"""- Space [{space_or_org_id}](https://huggingface.co/spaces/{space_or_org_id}) is ranked {repo_rank['rank']:.2f}%\n"""
    if kind == "model":
        result += f"""- Model [{space_or_org_id}](https://huggingface.co/{space_or_org_id}) is ranked {repo_rank['rank']:.2f}%\n"""
    if kind == "dataset":
        result += f"""- Dataset [{space_or_org_id}](https://huggingface.co/dataset/{space_or_org_id}) is ranked {repo_rank['rank']:.2f}%\n"""
    result += f"""- You have {repo_rank['num_higher']:,} {kind}s above and {repo_rank['num_lower']:,} {kind}s below in the ranking of {kind}s likes\n\n"""
    return _create_footer_message(result, kind)


def _create_footer_message(result, kind):
    result += """### ✨ Remember likes aren't everything!✨\n"""
    if kind == "space":
        result += """Some Spaces go very viral whilst other Spaces may be very useful for a smaller audience. If you think your Space is useful, please add it to this [thread](https://huggingface.co/spaces/librarian-bots/ranker/discussions/3) of awesome Spaces.
            We'll look out for awesome Spaces added to this thread to promote more widely!"""
    return result


def get_top_n_orgs_and_users_spaces(top_n=100):
    # gr.Info("Updating leaderboard, this may take a few seconds...")
    orgs_to_likes = create_org_to_space_like_dict()
    sorted_items = sorted(orgs_to_likes.items(), key=lambda item: item[1], reverse=True)
    sorted_items = sorted_items[:top_n]
    return sorted_items


def get_top_n_orgs_and_users_models(metric, top_n=100):
    # gr.Info("Updating leaderboard, this may take a few seconds...")
    orgs_to_likes = create_org_to_model_metrics(metric)
    sorted_items = sorted(orgs_to_likes.items(), key=lambda item: item[1], reverse=True)
    sorted_items = sorted_items[:top_n]
    return sorted_items


def get_top_n_orgs_and_users_datasets(metric, top_n=100):
    # gr.Info("Updating leaderboard, this may take a few seconds...")
    orgs_to_likes = create_org_to_dataset_metrics(metric)
    sorted_items = sorted(orgs_to_likes.items(), key=lambda item: item[1], reverse=True)
    sorted_items = sorted_items[:top_n]
    return sorted_items


def plot_top_n_orgs_and_users(kind, metric="likes", top_n=100):
    if kind == "space":
        top_n = get_top_n_orgs_and_users_spaces(top_n)
        header = """## 🏅 Top 100 Orgs and Users by Space Likes 🏅"""
        body = "".join(
            f"\n{i+1}. [{org}](https://huggingface.co/{org}) with {likes:,} likes"
            for i, (org, likes) in enumerate(top_n)
        )
        return header + body

    elif kind == "model":
        top_n = get_top_n_orgs_and_users_models(metric, top_n=top_n)
        header = """## 🏅 Top 100 Orgs and Users by Model Likes 🏅"""
        body = "".join(
            f"\n{i+1}. [{org}](https://huggingface.co/{org}) with {likes:,} likes"
            for i, (org, likes) in enumerate(top_n)
        )
        return header + body
    elif kind == "dataset":
        top_n = get_top_n_orgs_and_users_datasets(metric, top_n=top_n)
        header = """## 🏅 Top 100 Orgs and Users by Dataset Likes 🏅"""
        body = "".join(
            f"\n{i+1}. [{org}](https://huggingface.co/{org}) with {likes:,} likes"
            for i, (org, likes) in enumerate(top_n)
        )
        return header + body


def get_top_n_spaces(top_n=100):
    # gr.Info("Updating leaderboard, this may take a few seconds...")
    space_to_likes = create_space_to_like_dict()
    sorted_items = sorted(
        space_to_likes.items(), key=lambda item: item[1], reverse=True
    )
    sorted_items = sorted_items[:top_n]
    return sorted_items


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def get_top_n_models(metric_kind, top_n=100):
    # gr.Info("Updating leaderboard, this may take a few seconds...")
    model_to_likes = create_model_to_like_dict(metric_kind)
    sorted_items = sorted(
        model_to_likes.items(), key=lambda item: item[1], reverse=True
    )
    sorted_items = sorted_items[:top_n]
    return sorted_items


@cached(cache=TTLCache(maxsize=100, ttl=60 * 30))
def get_top_n_datasets(metric, top_n=100):
    # gr.Info("Updating leaderboard, this may take a few seconds...")
    dataset_to_likes = create_dataset_to_like_dict(metric)
    sorted_items = sorted(
        dataset_to_likes.items(), key=lambda item: item[1], reverse=True
    )
    sorted_items = sorted_items[:top_n]
    return sorted_items


def _plot_top_n_hub_repos(kind: HubRepoType, metric="likes", top_n=100):
    if kind == HubRepoType.SPACE:
        top_n = get_top_n_spaces(top_n)
        header = """## 🏅 Top 100 Space repositories by Likes 🏅"""
        body = "".join(
            f"\n{i+1}. [{space}](https://huggingface.co/spaces/{space}) with"
            f" {likes:,} likes"
            for i, (space, likes) in enumerate(top_n)
        )
        return header + body
    elif kind == HubRepoType.MODEL:
        top_n = get_top_n_models(metric, top_n)
        header = """## 🏅 Top 100 Model repositories by Likes 🏅"""
        body = "".join(
            f"\n{i+1}. [{model}](https://huggingface.co/{model}) with"
            f" {likes:,} likes"
            for i, (model, likes) in enumerate(top_n)
        )
        return header + body
    elif kind == HubRepoType.DATASET:
        top_n = get_top_n_datasets(metric, top_n)
        header = """## 🏅 Top 100 Dataset repositories by Likes 🏅"""
        body = "".join(
            f"\n{i+1}. [{dataset}](https://huggingface.co/dataset/{dataset}) with"
            f" {likes:,} likes"
            for i, (dataset, likes) in enumerate(top_n)
        )
        return header + body


def plot_top_n_hub_repos(kind, metric_kind="likes", top_n=100):
    if kind == "space":
        return _plot_top_n_hub_repos(HubRepoType.SPACE, top_n)
    elif kind == "model":
        return _plot_top_n_hub_repos(HubRepoType.MODEL, metric=metric_kind, top_n=top_n)
    elif kind == "dataset":
        return _plot_top_n_hub_repos(
            HubRepoType.DATASET, metric=metric_kind, top_n=top_n
        )


with gr.Blocks() as demo:
    gr.HTML("<h1 style='text-align: center;'> &#127942; HuggyRanker &#127942; </h1>")
    gr.HTML(
        """<p style='text-align: center;'>Rank a single repository or all of the repositories created by an organization or user by likes</p>"""
    )
    gr.HTML(
        """<p style="text-align: center;"><i>Remember likes aren't everything!</i></p>"""
    )
    gr.Markdown(
        """## Rank Specific Hub repositories or rank an organization or user by likes
    Provide this app with a Hub ID e.g. `librarian-bots/ranker` or a Username/Organization name e.g. `librarian-bots` to rank by likes."""
    )
    with gr.Row():
        space_id = gr.Textbox(
            "librarian-bots", max_lines=1, label="Space or user/organization ID"
        )
        filter_zero_likes = gr.Radio(
            choices=["no", "yes"],
            label="Filter out repositories with 0 likes in the ranking?",
            value="yes",
        )
        repo_type = gr.Radio(
            choices=["space", "model", "dataset"],
            label="Type of repo",
            value="space",
            interactive=True,
        )
    run_btn = gr.Button("Show ranking for this Space or org/user!", label="Rank Space")
    result = gr.Markdown()
    run_btn.click(
        rank_space_and_org,
        inputs=[space_id, repo_type, filter_zero_likes],
        outputs=result,
    )
    gr.Markdown("## Leaderboard of Top 100 Spaces and Orgs/Users by Likes")
    gr.Markdown(
        """The leaderboard is updated every 30 minutes.
                Choose the type of repo to rank by likes and click the button to show the leaderboard."""
    )
    show_refresh_btn = gr.Button("Show/refresh Leaderboard", label="Refresh")
    with gr.Row():
        with gr.Accordion("Show rankings for Orgs and Users", open=False):
            org_user_ranking = gr.Markdown()
            show_refresh_btn.click(
                plot_top_n_orgs_and_users, inputs=[repo_type], outputs=org_user_ranking
            )
        with gr.Accordion("Show rankings for individual repositories", open=False):
            repo_level_ranking = gr.Markdown()
            show_refresh_btn.click(
                plot_top_n_hub_repos, inputs=[repo_type], outputs=repo_level_ranking
            )
demo.queue(concurrency_count=4).launch()