IAMJB commited on
Commit
064d8d7
·
1 Parent(s): f74b9fc

leaderboards

Browse files
app.py CHANGED
@@ -4,6 +4,7 @@ from gradio_calendar import Calendar
4
  from datetime import datetime, timedelta
5
  from typing import Union, List
6
  from author_leaderboard_tab import author_leaderboard_tab
 
7
 
8
  # Initialize the PaperCentral class instance
9
  paper_central_df = PaperCentral()
@@ -16,6 +17,7 @@ with gr.Blocks(css="style.css") as demo:
16
  with gr.Tab("Paper-central", id="tab-paper-central"):
17
  with gr.Accordion(label="⭐Release notes", open=False):
18
  gr.Markdown("""
 
19
  - 8/10/2024 - MICCAI proceedings added
20
  - 7/10/2024 - COLM2024 proceedings added
21
  - 4/10/2024 - You can now filter by Title
@@ -109,7 +111,11 @@ with gr.Blocks(css="style.css") as demo:
109
  )
110
 
111
  with gr.Tab("Leaderboards", id="tab-leaderboards"):
112
- author_leaderboard_tab()
 
 
 
 
113
 
114
 
115
  # Define function to move to the next day
 
4
  from datetime import datetime, timedelta
5
  from typing import Union, List
6
  from author_leaderboard_tab import author_leaderboard_tab
7
+ from author_leaderboard_contrib_tab import author_resource_leaderboard_tab
8
 
9
  # Initialize the PaperCentral class instance
10
  paper_central_df = PaperCentral()
 
17
  with gr.Tab("Paper-central", id="tab-paper-central"):
18
  with gr.Accordion(label="⭐Release notes", open=False):
19
  gr.Markdown("""
20
+ - 11/10/2024 - Leaderboards added
21
  - 8/10/2024 - MICCAI proceedings added
22
  - 7/10/2024 - COLM2024 proceedings added
23
  - 4/10/2024 - You can now filter by Title
 
111
  )
112
 
113
  with gr.Tab("Leaderboards", id="tab-leaderboards"):
114
+ with gr.Tab("Authors"):
115
+ author_leaderboard_tab()
116
+ with gr.Tab("Contributors"):
117
+ author_resource_leaderboard_tab()
118
+
119
 
120
 
121
  # Define function to move to the next day
author_leaderboard_contrib_tab.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from df.author_leaderboard_contrib import AuthorLeaderboardContrib
3
+
4
+
5
+ def author_resource_leaderboard_tab():
6
+ # Initialize the AuthorLeaderboardContrib class
7
+ leaderboard = AuthorLeaderboardContrib()
8
+
9
+ with gr.Row():
10
+ gr.Markdown(
11
+ """
12
+ ## Contributors Leaderboard
13
+ The leaderboard centers on **artifact creators** who have developed models, datasets, or spaces associated with papers, regardless of whether they authored the original papers. It ranks contributors based on the total number of artifacts they've created that are linked to papers, as well as metrics like likes and downloads.
14
+ """
15
+ )
16
+ with gr.Row():
17
+ author_search_input = gr.Textbox(
18
+ label="Search by Contributor Name",
19
+ placeholder="Enter author name...",
20
+ lines=1,
21
+ )
22
+ entity_type_filter = gr.Radio(
23
+ label="Entity Type",
24
+ choices=['All', 'user', 'org'],
25
+ value='All',
26
+ )
27
+ with gr.Row():
28
+ leaderboard_component = gr.Dataframe(
29
+ label="Leaderboard",
30
+ value=leaderboard.df_prettified,
31
+ datatype=[leaderboard.DATATYPES[column] for column in leaderboard.COLUMNS_ORDER],
32
+ row_count=(0, "dynamic"),
33
+ interactive=False,
34
+ max_height=1000,
35
+ wrap=True,
36
+ )
37
+
38
+ # Define the interaction function
39
+ def update_leaderboard(author_name, entity_type):
40
+ return leaderboard.filter(author_name, entity_type)
41
+
42
+ inputs = [author_search_input, entity_type_filter]
43
+ outputs = [leaderboard_component]
44
+
45
+ # Set up the interactions
46
+ author_search_input.change(
47
+ update_leaderboard,
48
+ inputs=inputs,
49
+ outputs=outputs
50
+ )
51
+ entity_type_filter.change(
52
+ update_leaderboard,
53
+ inputs=inputs,
54
+ outputs=outputs
55
+ )
author_leaderboard_tab.py CHANGED
@@ -6,7 +6,11 @@ def author_leaderboard_tab():
6
  leaderboard = AuthorLeaderboard()
7
 
8
  with gr.Row():
9
- gr.Markdown("## Author Leaderboard")
 
 
 
 
10
  with gr.Row():
11
  author_search_input = gr.Textbox(
12
  label="Search by Author Name",
 
6
  leaderboard = AuthorLeaderboard()
7
 
8
  with gr.Row():
9
+ gr.Markdown(
10
+ """
11
+ ## Authors Leaderboard
12
+ The leaderboard focuses on <b>paper authors</b> whose works have attracted artifacts (models, datasets, spaces) from the community, even if they did not create these artifacts themselves. It ranks authors based on the total number of artifacts associated with their papers, along with other metrics like the number of papers they have, upvotes, comments, and average artifacts per paper.
13
+ """)
14
  with gr.Row():
15
  author_search_input = gr.Textbox(
16
  label="Search by Author Name",
df/author_leaderboard.py CHANGED
@@ -58,7 +58,7 @@ class AuthorLeaderboard:
58
  pd.DataFrame: The processed DataFrame.
59
  """
60
  # Load the dataset from the Hugging Face Hub
61
- dataset = load_dataset('IAMJB/paper-central-leaderboard', split='leaderboard')
62
  df = dataset.to_pandas()
63
 
64
  # Calculate total artifacts
 
58
  pd.DataFrame: The processed DataFrame.
59
  """
60
  # Load the dataset from the Hugging Face Hub
61
+ dataset = load_dataset('IAMJB/paper-central-leaderboard', split='train')
62
  df = dataset.to_pandas()
63
 
64
  # Calculate total artifacts
df/author_leaderboard_contrib.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from typing import List, Dict, Optional
3
+ import gradio as gr
4
+ from datasets import load_dataset
5
+
6
+ class AuthorLeaderboardContrib:
7
+ """
8
+ A class to manage and process author resource leaderboard data for display in a Gradio Dataframe component.
9
+ """
10
+
11
+ # Class-level constants defining columns and their data types
12
+ COLUMNS_ORDER: List[str] = [
13
+ 'Rank',
14
+ 'Author',
15
+ 'Entity Type',
16
+ 'Total Artifacts',
17
+ 'Total Papers',
18
+ 'Total Models',
19
+ 'Total Datasets',
20
+ 'Total Spaces',
21
+ 'Likes',
22
+ 'Downloads',
23
+ ]
24
+
25
+ DATATYPES: Dict[str, str] = {
26
+ 'Rank': 'str',
27
+ 'Author': 'markdown',
28
+ 'Entity Type': 'str',
29
+ 'Total Artifacts': 'int',
30
+ 'Total Papers': 'int',
31
+ 'Total Models': 'int',
32
+ 'Total Datasets': 'int',
33
+ 'Total Spaces': 'int',
34
+ 'Likes': 'int',
35
+ 'Downloads': 'int',
36
+ }
37
+
38
+ EMOTICONS = {
39
+ 1: '🥇',
40
+ 2: '🥈',
41
+ 3: '🥉'
42
+ }
43
+
44
+ def __init__(self):
45
+ """
46
+ Initialize the AuthorLeaderboardContrib class by loading and processing the dataset.
47
+ """
48
+ self.df_raw: pd.DataFrame = self.get_df()
49
+ self.df_prettified: pd.DataFrame = self.prettify(self.df_raw)
50
+
51
+ @staticmethod
52
+ def get_df() -> pd.DataFrame:
53
+ """
54
+ Load and process the leaderboard dataset.
55
+
56
+ Returns:
57
+ pd.DataFrame: The processed DataFrame.
58
+ """
59
+ # Load the dataset from the Hugging Face Hub
60
+ dataset = load_dataset('IAMJB/paper-central-leaderboard-contrib', split='train')
61
+ df = dataset.to_pandas()
62
+
63
+ # Exclude entries with 'entity_type' == 'unknown'
64
+ df = df[df['entity_type'] != 'unknown']
65
+
66
+ # Rename columns for clarity
67
+ df.rename(columns={
68
+ 'author': 'Author',
69
+ 'entity_type': 'Entity Type',
70
+ 'total_artifacts': 'Total Artifacts',
71
+ 'total_papers': 'Total Papers',
72
+ 'total_models': 'Total Models',
73
+ 'total_datasets': 'Total Datasets',
74
+ 'total_spaces': 'Total Spaces',
75
+ 'likes': 'Likes',
76
+ 'downloads': 'Downloads',
77
+ }, inplace=True)
78
+
79
+ return df
80
+
81
+ def prettify(self, df: pd.DataFrame) -> pd.DataFrame:
82
+ """
83
+ Prettify the DataFrame by adding rankings, emoticons, and markdown links.
84
+
85
+ Args:
86
+ df (pd.DataFrame): The DataFrame to prettify.
87
+
88
+ Returns:
89
+ pd.DataFrame: The prettified DataFrame.
90
+ """
91
+ df = df.copy()
92
+
93
+ # Sort authors by Total Artifacts descending
94
+ df.sort_values(by='Total Artifacts', ascending=False, inplace=True)
95
+
96
+ # Reset index to get ranks
97
+ df.reset_index(drop=True, inplace=True)
98
+ df.index += 1 # Start ranks from 1
99
+
100
+ # Add Rank column
101
+ df['Rank'] = df.index
102
+
103
+ # Add emoticons for top 3 ranks
104
+ df['Rank'] = df['Rank'].apply(lambda x: f"{self.EMOTICONS.get(x, '')} {x}" if x <= 3 else f"{x}")
105
+
106
+ # Convert 'Author' to markdown with profile links
107
+ df['Author'] = df.apply(self._create_author_link, axis=1)
108
+
109
+ # Select columns to display
110
+ df = df[self.COLUMNS_ORDER]
111
+
112
+ return df
113
+
114
+ def _create_author_link(self, row: pd.Series) -> str:
115
+ """
116
+ Create a markdown link for the author's profile.
117
+
118
+ Args:
119
+ row (pd.Series): A row from the DataFrame.
120
+
121
+ Returns:
122
+ str: The markdown link for the author.
123
+ """
124
+ author = row['Author']
125
+ profile_url = f"https://huggingface.co/{author}"
126
+ return f"[{author}]({profile_url})"
127
+
128
+ def filter(
129
+ self,
130
+ author_search_input: Optional[str] = None,
131
+ entity_type_filter: Optional[str] = 'All'
132
+ ) -> gr.update:
133
+ """
134
+ Filter the DataFrame based on the author search input and entity type.
135
+
136
+ Args:
137
+ author_search_input (Optional[str]): The author name to search for.
138
+ entity_type_filter (Optional[str]): The entity type to filter by ('All', 'user', 'org').
139
+
140
+ Returns:
141
+ gr.Update: An update object for the Gradio Dataframe component.
142
+ """
143
+ filtered_df: pd.DataFrame = self.df_prettified.copy()
144
+
145
+ if author_search_input:
146
+ search_string = author_search_input.lower()
147
+ filtered_df = filtered_df[filtered_df['Author'].str.lower().str.contains(search_string)]
148
+
149
+ # Filter by entity type
150
+ if entity_type_filter != 'All':
151
+ filtered_df = filtered_df[filtered_df['Entity Type'] == entity_type_filter]
152
+
153
+ # Get the corresponding data types for the columns
154
+ datatypes: List[str] = [self.DATATYPES.get(col, 'str') for col in filtered_df.columns]
155
+
156
+ return gr.update(value=filtered_df, datatype=datatypes)