Spaces:

huggingface
/

paper-central

Running

App Files Files Community

IAMJB commited on Oct 11, 2024

Commit

064d8d7

1 Parent(s): f74b9fc

leaderboards

Browse files

Files changed (5) hide show

app.py +7 -1
author_leaderboard_contrib_tab.py +55 -0
author_leaderboard_tab.py +5 -1
df/author_leaderboard.py +1 -1
df/author_leaderboard_contrib.py +156 -0

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ from gradio_calendar import Calendar
 from datetime import datetime, timedelta
 from typing import Union, List
 from author_leaderboard_tab import author_leaderboard_tab
 # Initialize the PaperCentral class instance
 paper_central_df = PaperCentral()
@@ -16,6 +17,7 @@ with gr.Blocks(css="style.css") as demo:
         with gr.Tab("Paper-central", id="tab-paper-central"):
             with gr.Accordion(label="⭐Release notes", open=False):
                 gr.Markdown("""
                 - 8/10/2024 - MICCAI proceedings added
                 - 7/10/2024 - COLM2024 proceedings added
                 - 4/10/2024 - You can now filter by Title
@@ -109,7 +111,11 @@ with gr.Blocks(css="style.css") as demo:
             )
         with gr.Tab("Leaderboards", id="tab-leaderboards"):
-            author_leaderboard_tab()
     # Define function to move to the next day

 from datetime import datetime, timedelta
 from typing import Union, List
 from author_leaderboard_tab import author_leaderboard_tab
+from author_leaderboard_contrib_tab import author_resource_leaderboard_tab
 # Initialize the PaperCentral class instance
 paper_central_df = PaperCentral()
         with gr.Tab("Paper-central", id="tab-paper-central"):
             with gr.Accordion(label="⭐Release notes", open=False):
                 gr.Markdown("""
+                - 11/10/2024 - Leaderboards added
                 - 8/10/2024 - MICCAI proceedings added
                 - 7/10/2024 - COLM2024 proceedings added
                 - 4/10/2024 - You can now filter by Title
             )
         with gr.Tab("Leaderboards", id="tab-leaderboards"):
+            with gr.Tab("Authors"):
+                author_leaderboard_tab()
+            with gr.Tab("Contributors"):
+                author_resource_leaderboard_tab()
     # Define function to move to the next day

author_leaderboard_contrib_tab.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import gradio as gr
+from df.author_leaderboard_contrib import AuthorLeaderboardContrib
+def author_resource_leaderboard_tab():
+    # Initialize the AuthorLeaderboardContrib class
+    leaderboard = AuthorLeaderboardContrib()
+    with gr.Row():
+        gr.Markdown(
+            """
+            ## Contributors Leaderboard
+            The leaderboard centers on **artifact creators** who have developed models, datasets, or spaces associated with papers, regardless of whether they authored the original papers. It ranks contributors based on the total number of artifacts they've created that are linked to papers, as well as metrics like likes and downloads.
+            """
+        )
+    with gr.Row():
+        author_search_input = gr.Textbox(
+            label="Search by Contributor Name",
+            placeholder="Enter author name...",
+            lines=1,
+        )
+        entity_type_filter = gr.Radio(
+            label="Entity Type",
+            choices=['All', 'user', 'org'],
+            value='All',
+        )
+    with gr.Row():
+        leaderboard_component = gr.Dataframe(
+            label="Leaderboard",
+            value=leaderboard.df_prettified,
+            datatype=[leaderboard.DATATYPES[column] for column in leaderboard.COLUMNS_ORDER],
+            row_count=(0, "dynamic"),
+            interactive=False,
+            max_height=1000,
+            wrap=True,
+        )
+    # Define the interaction function
+    def update_leaderboard(author_name, entity_type):
+        return leaderboard.filter(author_name, entity_type)
+    inputs = [author_search_input, entity_type_filter]
+    outputs = [leaderboard_component]
+    # Set up the interactions
+    author_search_input.change(
+        update_leaderboard,
+        inputs=inputs,
+        outputs=outputs
+    )
+    entity_type_filter.change(
+        update_leaderboard,
+        inputs=inputs,
+        outputs=outputs
+    )

author_leaderboard_tab.py CHANGED Viewed

@@ -6,7 +6,11 @@ def author_leaderboard_tab():
     leaderboard = AuthorLeaderboard()
     with gr.Row():
-        gr.Markdown("## Author Leaderboard")
     with gr.Row():
         author_search_input = gr.Textbox(
             label="Search by Author Name",

     leaderboard = AuthorLeaderboard()
     with gr.Row():
+        gr.Markdown(
+            """
+             ## Authors Leaderboard
+             The leaderboard focuses on <b>paper authors</b> whose works have attracted artifacts (models, datasets, spaces) from the community, even if they did not create these artifacts themselves. It ranks authors based on the total number of artifacts associated with their papers, along with other metrics like the number of papers they have, upvotes, comments, and average artifacts per paper.
+             """)
     with gr.Row():
         author_search_input = gr.Textbox(
             label="Search by Author Name",

df/author_leaderboard.py CHANGED Viewed

@@ -58,7 +58,7 @@ class AuthorLeaderboard:
             pd.DataFrame: The processed DataFrame.
         """
         # Load the dataset from the Hugging Face Hub
-        dataset = load_dataset('IAMJB/paper-central-leaderboard', split='leaderboard')
         df = dataset.to_pandas()
         # Calculate total artifacts

             pd.DataFrame: The processed DataFrame.
         """
         # Load the dataset from the Hugging Face Hub
+        dataset = load_dataset('IAMJB/paper-central-leaderboard', split='train')
         df = dataset.to_pandas()
         # Calculate total artifacts

df/author_leaderboard_contrib.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import pandas as pd
+from typing import List, Dict, Optional
+import gradio as gr
+from datasets import load_dataset
+class AuthorLeaderboardContrib:
+    """
+    A class to manage and process author resource leaderboard data for display in a Gradio Dataframe component.
+    """
+    # Class-level constants defining columns and their data types
+    COLUMNS_ORDER: List[str] = [
+        'Rank',
+        'Author',
+        'Entity Type',
+        'Total Artifacts',
+        'Total Papers',
+        'Total Models',
+        'Total Datasets',
+        'Total Spaces',
+        'Likes',
+        'Downloads',
+    ]
+    DATATYPES: Dict[str, str] = {
+        'Rank': 'str',
+        'Author': 'markdown',
+        'Entity Type': 'str',
+        'Total Artifacts': 'int',
+        'Total Papers': 'int',
+        'Total Models': 'int',
+        'Total Datasets': 'int',
+        'Total Spaces': 'int',
+        'Likes': 'int',
+        'Downloads': 'int',
+    }
+    EMOTICONS = {
+        1: '🥇',
+        2: '🥈',
+        3: '🥉'
+    }
+    def __init__(self):
+        """
+        Initialize the AuthorLeaderboardContrib class by loading and processing the dataset.
+        """
+        self.df_raw: pd.DataFrame = self.get_df()
+        self.df_prettified: pd.DataFrame = self.prettify(self.df_raw)
+    @staticmethod
+    def get_df() -> pd.DataFrame:
+        """
+        Load and process the leaderboard dataset.
+        Returns:
+            pd.DataFrame: The processed DataFrame.
+        """
+        # Load the dataset from the Hugging Face Hub
+        dataset = load_dataset('IAMJB/paper-central-leaderboard-contrib', split='train')
+        df = dataset.to_pandas()
+        # Exclude entries with 'entity_type' == 'unknown'
+        df = df[df['entity_type'] != 'unknown']
+        # Rename columns for clarity
+        df.rename(columns={
+            'author': 'Author',
+            'entity_type': 'Entity Type',
+            'total_artifacts': 'Total Artifacts',
+            'total_papers': 'Total Papers',
+            'total_models': 'Total Models',
+            'total_datasets': 'Total Datasets',
+            'total_spaces': 'Total Spaces',
+            'likes': 'Likes',
+            'downloads': 'Downloads',
+        }, inplace=True)
+        return df
+    def prettify(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Prettify the DataFrame by adding rankings, emoticons, and markdown links.
+        Args:
+            df (pd.DataFrame): The DataFrame to prettify.
+        Returns:
+            pd.DataFrame: The prettified DataFrame.
+        """
+        df = df.copy()
+        # Sort authors by Total Artifacts descending
+        df.sort_values(by='Total Artifacts', ascending=False, inplace=True)
+        # Reset index to get ranks
+        df.reset_index(drop=True, inplace=True)
+        df.index += 1  # Start ranks from 1
+        # Add Rank column
+        df['Rank'] = df.index
+        # Add emoticons for top 3 ranks
+        df['Rank'] = df['Rank'].apply(lambda x: f"{self.EMOTICONS.get(x, '')} {x}" if x <= 3 else f"{x}")
+        # Convert 'Author' to markdown with profile links
+        df['Author'] = df.apply(self._create_author_link, axis=1)
+        # Select columns to display
+        df = df[self.COLUMNS_ORDER]
+        return df
+    def _create_author_link(self, row: pd.Series) -> str:
+        """
+        Create a markdown link for the author's profile.
+        Args:
+            row (pd.Series): A row from the DataFrame.
+        Returns:
+            str: The markdown link for the author.
+        """
+        author = row['Author']
+        profile_url = f"https://huggingface.co/{author}"
+        return f"[{author}]({profile_url})"
+    def filter(
+        self,
+        author_search_input: Optional[str] = None,
+        entity_type_filter: Optional[str] = 'All'
+    ) -> gr.update:
+        """
+        Filter the DataFrame based on the author search input and entity type.
+        Args:
+            author_search_input (Optional[str]): The author name to search for.
+            entity_type_filter (Optional[str]): The entity type to filter by ('All', 'user', 'org').
+        Returns:
+            gr.Update: An update object for the Gradio Dataframe component.
+        """
+        filtered_df: pd.DataFrame = self.df_prettified.copy()
+        if author_search_input:
+            search_string = author_search_input.lower()
+            filtered_df = filtered_df[filtered_df['Author'].str.lower().str.contains(search_string)]
+        # Filter by entity type
+        if entity_type_filter != 'All':
+            filtered_df = filtered_df[filtered_df['Entity Type'] == entity_type_filter]
+        # Get the corresponding data types for the columns
+        datatypes: List[str] = [self.DATATYPES.get(col, 'str') for col in filtered_df.columns]
+        return gr.update(value=filtered_df, datatype=datatypes)