Spaces:

huggingface
/

paper-central

Running

File size: 4,839 Bytes

064d8d7

import pandas as pd
from typing import List, Dict, Optional
import gradio as gr
from datasets import load_dataset

class AuthorLeaderboardContrib:
    """
    A class to manage and process author resource leaderboard data for display in a Gradio Dataframe component.
    """

    # Class-level constants defining columns and their data types
    COLUMNS_ORDER: List[str] = [
        'Rank',
        'Author',
        'Entity Type',
        'Total Artifacts',
        'Total Papers',
        'Total Models',
        'Total Datasets',
        'Total Spaces',
        'Likes',
        'Downloads',
    ]

    DATATYPES: Dict[str, str] = {
        'Rank': 'str',
        'Author': 'markdown',
        'Entity Type': 'str',
        'Total Artifacts': 'int',
        'Total Papers': 'int',
        'Total Models': 'int',
        'Total Datasets': 'int',
        'Total Spaces': 'int',
        'Likes': 'int',
        'Downloads': 'int',
    }

    EMOTICONS = {
        1: '🥇',
        2: '🥈',
        3: '🥉'
    }

    def __init__(self):
        """
        Initialize the AuthorLeaderboardContrib class by loading and processing the dataset.
        """
        self.df_raw: pd.DataFrame = self.get_df()
        self.df_prettified: pd.DataFrame = self.prettify(self.df_raw)

    @staticmethod
    def get_df() -> pd.DataFrame:
        """
        Load and process the leaderboard dataset.

        Returns:
            pd.DataFrame: The processed DataFrame.
        """
        # Load the dataset from the Hugging Face Hub
        dataset = load_dataset('IAMJB/paper-central-leaderboard-contrib', split='train')
        df = dataset.to_pandas()

        # Exclude entries with 'entity_type' == 'unknown'
        df = df[df['entity_type'] != 'unknown']

        # Rename columns for clarity
        df.rename(columns={
            'author': 'Author',
            'entity_type': 'Entity Type',
            'total_artifacts': 'Total Artifacts',
            'total_papers': 'Total Papers',
            'total_models': 'Total Models',
            'total_datasets': 'Total Datasets',
            'total_spaces': 'Total Spaces',
            'likes': 'Likes',
            'downloads': 'Downloads',
        }, inplace=True)

        return df

    def prettify(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Prettify the DataFrame by adding rankings, emoticons, and markdown links.

        Args:
            df (pd.DataFrame): The DataFrame to prettify.

        Returns:
            pd.DataFrame: The prettified DataFrame.
        """
        df = df.copy()

        # Sort authors by Total Artifacts descending
        df.sort_values(by='Total Artifacts', ascending=False, inplace=True)

        # Reset index to get ranks
        df.reset_index(drop=True, inplace=True)
        df.index += 1  # Start ranks from 1

        # Add Rank column
        df['Rank'] = df.index

        # Add emoticons for top 3 ranks
        df['Rank'] = df['Rank'].apply(lambda x: f"{self.EMOTICONS.get(x, '')} {x}" if x <= 3 else f"{x}")

        # Convert 'Author' to markdown with profile links
        df['Author'] = df.apply(self._create_author_link, axis=1)

        # Select columns to display
        df = df[self.COLUMNS_ORDER]

        return df

    def _create_author_link(self, row: pd.Series) -> str:
        """
        Create a markdown link for the author's profile.

        Args:
            row (pd.Series): A row from the DataFrame.

        Returns:
            str: The markdown link for the author.
        """
        author = row['Author']
        profile_url = f"https://huggingface.co/{author}"
        return f"[{author}]({profile_url})"

    def filter(
        self,
        author_search_input: Optional[str] = None,
        entity_type_filter: Optional[str] = 'All'
    ) -> gr.update:
        """
        Filter the DataFrame based on the author search input and entity type.

        Args:
            author_search_input (Optional[str]): The author name to search for.
            entity_type_filter (Optional[str]): The entity type to filter by ('All', 'user', 'org').

        Returns:
            gr.Update: An update object for the Gradio Dataframe component.
        """
        filtered_df: pd.DataFrame = self.df_prettified.copy()

        if author_search_input:
            search_string = author_search_input.lower()
            filtered_df = filtered_df[filtered_df['Author'].str.lower().str.contains(search_string)]

        # Filter by entity type
        if entity_type_filter != 'All':
            filtered_df = filtered_df[filtered_df['Entity Type'] == entity_type_filter]

        # Get the corresponding data types for the columns
        datatypes: List[str] = [self.DATATYPES.get(col, 'str') for col in filtered_df.columns]

        return gr.update(value=filtered_df, datatype=datatypes)