File size: 4,839 Bytes
064d8d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import pandas as pd
from typing import List, Dict, Optional
import gradio as gr
from datasets import load_dataset

class AuthorLeaderboardContrib:
    """
    A class to manage and process author resource leaderboard data for display in a Gradio Dataframe component.
    """

    # Class-level constants defining columns and their data types
    COLUMNS_ORDER: List[str] = [
        'Rank',
        'Author',
        'Entity Type',
        'Total Artifacts',
        'Total Papers',
        'Total Models',
        'Total Datasets',
        'Total Spaces',
        'Likes',
        'Downloads',
    ]

    DATATYPES: Dict[str, str] = {
        'Rank': 'str',
        'Author': 'markdown',
        'Entity Type': 'str',
        'Total Artifacts': 'int',
        'Total Papers': 'int',
        'Total Models': 'int',
        'Total Datasets': 'int',
        'Total Spaces': 'int',
        'Likes': 'int',
        'Downloads': 'int',
    }

    EMOTICONS = {
        1: '🥇',
        2: '🥈',
        3: '🥉'
    }

    def __init__(self):
        """
        Initialize the AuthorLeaderboardContrib class by loading and processing the dataset.
        """
        self.df_raw: pd.DataFrame = self.get_df()
        self.df_prettified: pd.DataFrame = self.prettify(self.df_raw)

    @staticmethod
    def get_df() -> pd.DataFrame:
        """
        Load and process the leaderboard dataset.

        Returns:
            pd.DataFrame: The processed DataFrame.
        """
        # Load the dataset from the Hugging Face Hub
        dataset = load_dataset('IAMJB/paper-central-leaderboard-contrib', split='train')
        df = dataset.to_pandas()

        # Exclude entries with 'entity_type' == 'unknown'
        df = df[df['entity_type'] != 'unknown']

        # Rename columns for clarity
        df.rename(columns={
            'author': 'Author',
            'entity_type': 'Entity Type',
            'total_artifacts': 'Total Artifacts',
            'total_papers': 'Total Papers',
            'total_models': 'Total Models',
            'total_datasets': 'Total Datasets',
            'total_spaces': 'Total Spaces',
            'likes': 'Likes',
            'downloads': 'Downloads',
        }, inplace=True)

        return df

    def prettify(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Prettify the DataFrame by adding rankings, emoticons, and markdown links.

        Args:
            df (pd.DataFrame): The DataFrame to prettify.

        Returns:
            pd.DataFrame: The prettified DataFrame.
        """
        df = df.copy()

        # Sort authors by Total Artifacts descending
        df.sort_values(by='Total Artifacts', ascending=False, inplace=True)

        # Reset index to get ranks
        df.reset_index(drop=True, inplace=True)
        df.index += 1  # Start ranks from 1

        # Add Rank column
        df['Rank'] = df.index

        # Add emoticons for top 3 ranks
        df['Rank'] = df['Rank'].apply(lambda x: f"{self.EMOTICONS.get(x, '')} {x}" if x <= 3 else f"{x}")

        # Convert 'Author' to markdown with profile links
        df['Author'] = df.apply(self._create_author_link, axis=1)

        # Select columns to display
        df = df[self.COLUMNS_ORDER]

        return df

    def _create_author_link(self, row: pd.Series) -> str:
        """
        Create a markdown link for the author's profile.

        Args:
            row (pd.Series): A row from the DataFrame.

        Returns:
            str: The markdown link for the author.
        """
        author = row['Author']
        profile_url = f"https://huggingface.co/{author}"
        return f"[{author}]({profile_url})"

    def filter(
        self,
        author_search_input: Optional[str] = None,
        entity_type_filter: Optional[str] = 'All'
    ) -> gr.update:
        """
        Filter the DataFrame based on the author search input and entity type.

        Args:
            author_search_input (Optional[str]): The author name to search for.
            entity_type_filter (Optional[str]): The entity type to filter by ('All', 'user', 'org').

        Returns:
            gr.Update: An update object for the Gradio Dataframe component.
        """
        filtered_df: pd.DataFrame = self.df_prettified.copy()

        if author_search_input:
            search_string = author_search_input.lower()
            filtered_df = filtered_df[filtered_df['Author'].str.lower().str.contains(search_string)]

        # Filter by entity type
        if entity_type_filter != 'All':
            filtered_df = filtered_df[filtered_df['Entity Type'] == entity_type_filter]

        # Get the corresponding data types for the columns
        datatypes: List[str] = [self.DATATYPES.get(col, 'str') for col in filtered_df.columns]

        return gr.update(value=filtered_df, datatype=datatypes)