Spaces:

huggingface
/

paper-central

Running

App Files Files Community

IAMJB commited on Oct 11, 2024

Commit

bc87bb9

1 Parent(s): e4eebea

Leaderboard

Browse files

Files changed (5) hide show

README.md +1 -1
app.py +94 -87
author_leaderboard_tab.py +32 -0
df/PaperCentral.py +1 -1
df/author_leaderboard.py +152 -0

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ emoji: ⚡
 colorFrom: red
 colorTo: purple
 sdk: gradio
-sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 header: mini

 colorFrom: red
 colorTo: purple
 sdk: gradio
+sdk_version: 5.0.2
 app_file: app.py
 pinned: false
 header: mini

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from df.PaperCentral import PaperCentral
 from gradio_calendar import Calendar
 from datetime import datetime, timedelta
 from typing import Union, List
 # Initialize the PaperCentral class instance
 paper_central_df = PaperCentral()
@@ -11,97 +12,103 @@ paper_central_df = PaperCentral()
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown("# Paper Central")
-    with gr.Accordion(label="⭐Release notes", open=False):
-        gr.Markdown("""
-        4/10/2024 - You can now filter by Title
-        """)
-    # Create a row for navigation buttons and calendar
-    with gr.Row():
-        with gr.Column(scale=1):
-            # Define the 'Next Day' and 'Previous Day' buttons
-            next_day_btn = gr.Button("Next Day")
-            prev_day_btn = gr.Button("Previous Day")
-        with gr.Column(scale=4):
-            # Define the calendar component for date selection
-            calendar = Calendar(
-                type="datetime",
-                label="Select a date",
-                info="Click the calendar icon to bring up the calendar.",
-                value=datetime.today().strftime('%Y-%m-%d')  # Default to today's date
-            )
-    # Create a row for Hugging Face options and Conference options
-    with gr.Row():
-        with gr.Column():
-            # Define the checkbox group for Hugging Face options
-            cat_options = gr.CheckboxGroup(
-                label="Category",
-                choices=[
-                    'cs.*',
-                    'eess.*',
-                    'econ.*',
-                    'math.*',
-                    'astro-ph.*',
-                    'cond-mat.*',
-                    'gr-qc',
-                    'hep-ex',
-                    'hep-lat',
-                    'hep-ph',
-                    'hep-th',
-                    'math-ph',
-                    'nlin.*',
-                    'nucl-ex',
-                    'nucl-th',
-                    'physics.*',
-                    'quant-ph',
-                    'q-bio.*',
-                    'q-fin.*',
-                    'stat.*',
-                ],
-                value=["cs.*"]
-            )
-            hf_options = gr.CheckboxGroup(
-                label="Hugging Face options",
-                choices=["🤗 paper-page", "datasets", "models", "spaces", "github"],
-                value=[],
-                elem_id="hf_options"
-            )
-        with gr.Column():
-            # Define the checkbox group for Conference options
-            conference_options = gr.CheckboxGroup(
-                label="Conference options",
-                choices=["In proceedings"] + PaperCentral.CONFERENCES
-            )
-            with gr.Row():
-                # Define a Textbox for author search
-                author_search = gr.Textbox(
-                    label="Search Authors",
-                    placeholder="Enter author name",
                 )
-                title_search = gr.Textbox(
-                    label="Search Title",
-                    placeholder="Enter keywords",
                 )
-    # Define the Dataframe component to display paper data
-    # List of columns in your DataFrame
-    columns = paper_central_df.COLUMNS_START_PAPER_PAGE
-    paper_central_component = gr.Dataframe(
-        label="Paper Data",
-        value=paper_central_df.df_prettified[columns],
-        datatype=[
-            paper_central_df.DATATYPES[column]
-            for column in columns
-        ],
-        row_count=(0, "dynamic"),
-        interactive=False,
-        height=1000,
-        elem_id="table",
-        wrap=True,
-    )
     # Define function to move to the next day

 from gradio_calendar import Calendar
 from datetime import datetime, timedelta
 from typing import Union, List
+from author_leaderboard_tab import author_leaderboard_tab
 # Initialize the PaperCentral class instance
 paper_central_df = PaperCentral()
 with gr.Blocks(css="style.css") as demo:
     gr.Markdown("# Paper Central")
+    with gr.Tab("Paper-central"):
+        with gr.Accordion(label="⭐Release notes", open=False):
+            gr.Markdown("""
+            - 8/10/2024 - MICCAI proceedings added
+            - 7/10/2024 - COLM2024 proceedings added
+            - 4/10/2024 - You can now filter by Title
+            """)
+        # Create a row for navigation buttons and calendar
+        with gr.Row():
+            with gr.Column(scale=1):
+                # Define the 'Next Day' and 'Previous Day' buttons
+                next_day_btn = gr.Button("Next Day")
+                prev_day_btn = gr.Button("Previous Day")
+            with gr.Column(scale=4):
+                # Define the calendar component for date selection
+                calendar = Calendar(
+                    type="datetime",
+                    label="Select a date",
+                    info="Click the calendar icon to bring up the calendar.",
+                    value=datetime.today().strftime('%Y-%m-%d')  # Default to today's date
+                )
+        # Create a row for Hugging Face options and Conference options
+        with gr.Row():
+            with gr.Column():
+                # Define the checkbox group for Hugging Face options
+                cat_options = gr.CheckboxGroup(
+                    label="Category",
+                    choices=[
+                        'cs.*',
+                        'eess.*',
+                        'econ.*',
+                        'math.*',
+                        'astro-ph.*',
+                        'cond-mat.*',
+                        'gr-qc',
+                        'hep-ex',
+                        'hep-lat',
+                        'hep-ph',
+                        'hep-th',
+                        'math-ph',
+                        'nlin.*',
+                        'nucl-ex',
+                        'nucl-th',
+                        'physics.*',
+                        'quant-ph',
+                        'q-bio.*',
+                        'q-fin.*',
+                        'stat.*',
+                    ],
+                    value=["cs.*"]
                 )
+                hf_options = gr.CheckboxGroup(
+                    label="Hugging Face options",
+                    choices=["🤗 artifacts", "datasets", "models", "spaces", "github"],
+                    value=[],
+                    elem_id="hf_options"
                 )
+            with gr.Column():
+                # Define the checkbox group for Conference options
+                conference_options = gr.CheckboxGroup(
+                    label="Conference options",
+                    choices=["In proceedings"] + PaperCentral.CONFERENCES
+                )
+                with gr.Row():
+                    # Define a Textbox for author search
+                    author_search = gr.Textbox(
+                        label="Search Authors",
+                        placeholder="Enter author name",
+                    )
+                    title_search = gr.Textbox(
+                        label="Search Title",
+                        placeholder="Enter keywords",
+                    )
+        # Define the Dataframe component to display paper data
+        # List of columns in your DataFrame
+        columns = paper_central_df.COLUMNS_START_PAPER_PAGE
+        paper_central_component = gr.Dataframe(
+            label="Paper Data",
+            value=paper_central_df.df_prettified[columns],
+            datatype=[
+                paper_central_df.DATATYPES[column]
+                for column in columns
+            ],
+            row_count=(0, "dynamic"),
+            interactive=False,
+            max_height=1000,
+            elem_id="table",
+            wrap=True,
+        )
+    with gr.Tab("Leaderboard"):
+        author_leaderboard_tab()
     # Define function to move to the next day

author_leaderboard_tab.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import gradio as gr
+from df.author_leaderboard import AuthorLeaderboard
+def author_leaderboard_tab():
+    # Initialize the AuthorLeaderboard class
+    leaderboard = AuthorLeaderboard()
+    with gr.Row():
+        gr.Markdown("## Author Leaderboard")
+    with gr.Row():
+        author_search_input = gr.Textbox(
+            label="Search by Author Name",
+            placeholder="Enter author name...",
+            lines=1,
+        )
+    with gr.Row():
+        leaderboard_component = gr.Dataframe(
+            label="Leaderboard",
+            value=leaderboard.df_prettified,
+            datatype=[leaderboard.DATATYPES[column] for column in leaderboard.COLUMNS_ORDER],
+            row_count=(0, "dynamic"),
+            interactive=False,
+            max_height=1000,
+            wrap=True,
+        )
+    # Define the interaction
+    author_search_input.change(
+        leaderboard.filter,
+        inputs=[author_search_input],
+        outputs=[leaderboard_component]
+    )

df/PaperCentral.py CHANGED Viewed

@@ -356,7 +356,7 @@ class PaperCentral:
         # HF options
         if hf_options:
-            if "🤗 paper-page" in hf_options:
                 # Filter rows where 'paper_page' is not empty or NaN
                 filtered_df = filtered_df[
                     (filtered_df['paper_page'] != "") & (filtered_df['paper_page'].notna())

         # HF options
         if hf_options:
+            if "🤗 artifacts" in hf_options:
                 # Filter rows where 'paper_page' is not empty or NaN
                 filtered_df = filtered_df[
                     (filtered_df['paper_page'] != "") & (filtered_df['paper_page'].notna())

df/author_leaderboard.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import pandas as pd
+from typing import List, Dict, Optional
+import gradio as gr
+from datasets import load_dataset
+import numpy as np
+class AuthorLeaderboard:
+    """
+    A class to manage and process author leaderboard data for display in a Gradio Dataframe component.
+    """
+    # Class-level constants defining columns and their data types
+    COLUMNS_ORDER: List[str] = [
+        'Rank',
+        'Author',
+        'Total Artifacts',
+        'Avg Artifacts per Paper',
+        'Total Papers',
+        'Total Models',
+        'Total Datasets',
+        'Total Spaces',
+        'Upvotes',
+        'Comments',
+    ]
+    DATATYPES: Dict[str, str] = {
+        'Rank': 'str',
+        'Author': 'markdown',
+        'Total Artifacts': 'int',
+        'Avg Artifacts per Paper': 'float',
+        'Total Papers': 'int',
+        'Total Models': 'int',
+        'Total Datasets': 'int',
+        'Total Spaces': 'int',
+        'Upvotes': 'int',
+        'Comments': 'int',
+    }
+    EMOTICONS = {
+        1: '🥇',
+        2: '🥈',
+        3: '🥉'
+    }
+    def __init__(self):
+        """
+        Initialize the AuthorLeaderboard class by loading and processing the dataset.
+        """
+        self.df_raw: pd.DataFrame = self.get_df()
+        self.df_prettified: pd.DataFrame = self.prettify(self.df_raw)
+    @staticmethod
+    def get_df() -> pd.DataFrame:
+        """
+        Load and process the leaderboard dataset.
+        Returns:
+            pd.DataFrame: The processed DataFrame.
+        """
+        # Load the dataset from the Hugging Face Hub
+        dataset = load_dataset('IAMJB/paper-central-leaderboard', split='leaderboard')
+        df = dataset.to_pandas()
+        # Calculate total artifacts
+        df['Total Artifacts'] = df['num_models'] + df['num_datasets'] + df['num_spaces']
+        # Calculate average artifacts per paper
+        df['Avg Artifacts per Paper'] = df['Total Artifacts'] / df['num_papers']
+        df['Avg Artifacts per Paper'] = df['Avg Artifacts per Paper'].round(2)
+        # Rename columns for clarity
+        df.rename(columns={
+            'name': 'Author',
+            'num_papers': 'Total Papers',
+            'num_models': 'Total Models',
+            'num_datasets': 'Total Datasets',
+            'num_spaces': 'Total Spaces',
+            'upvotes': 'Upvotes',
+            'num_comments': 'Comments',
+        }, inplace=True)
+        return df
+    def prettify(self, df: pd.DataFrame) -> pd.DataFrame:
+        """
+        Prettify the DataFrame by adding rankings, emoticons, and markdown links.
+        Args:
+            df (pd.DataFrame): The DataFrame to prettify.
+        Returns:
+            pd.DataFrame: The prettified DataFrame.
+        """
+        df = df.copy()
+        # Sort authors by Total Artifacts descending
+        df.sort_values(by='Total Artifacts', ascending=False, inplace=True)
+        # Reset index to get ranks
+        df.reset_index(drop=True, inplace=True)
+        df.index += 1  # Start ranks from 1
+        # Add Rank column
+        df['Rank'] = df.index
+        # Add emoticons for top 3 ranks
+        df['Rank'] = df['Rank'].apply(lambda x: f"{self.EMOTICONS.get(x, '')} {x}" if x <= 3 else f"{x}")
+        # Convert 'Author' to markdown with profile links if 'username' is available
+        df['Author'] = df.apply(self._create_author_link, axis=1)
+        # Select columns to display
+        df = df[self.COLUMNS_ORDER]
+        return df
+    def _create_author_link(self, row: pd.Series) -> str:
+        """
+        Create a markdown link for the author's profile.
+        Args:
+            row (pd.Series): A row from the DataFrame.
+        Returns:
+            str: The markdown link for the author.
+        """
+        if pd.notna(row.get('username')) and row['username']:
+            profile_url = f"https://huggingface.co/{row['username']}"
+            return f"[{row['Author']}]({profile_url})"
+        else:
+            return row['Author']
+    def filter(self, author_search_input: Optional[str] = None) -> gr.update:
+        """
+        Filter the DataFrame based on the author search input.
+        Args:
+            author_search_input (Optional[str]): The author name to search for.
+        Returns:
+            gr.Update: An update object for the Gradio Dataframe component.
+        """
+        filtered_df: pd.DataFrame = self.df_prettified.copy()
+        if author_search_input:
+            search_string = author_search_input.lower()
+            filtered_df = filtered_df[filtered_df['Author'].str.lower().str.contains(search_string)]
+        # Get the corresponding data types for the columns
+        datatypes: List[str] = [self.DATATYPES.get(col, 'str') for col in filtered_df.columns]
+        return gr.update(value=filtered_df, datatype=datatypes)