|
import streamlit as st |
|
import numpy as np |
|
import pandas as pd |
|
|
|
st.set_page_config(layout="wide") |
|
|
|
st.header("HuggingFace π€ Posts leaderboard") |
|
|
|
st.write( |
|
"""Data Source: https://huggingface.co/datasets/maxiw/hf-posts""" |
|
) |
|
|
|
df = pd.read_json("hf://datasets/maxiw/hf-posts/posts.jsonl", lines=True) |
|
|
|
df["publishedAt"] = pd.to_datetime(df.publishedAt) |
|
|
|
|
|
|
|
metrics = ["totalUniqueImpressions", "totalReactions", "numComments", "Num of posts"] |
|
|
|
|
|
|
|
min_date = df["publishedAt"].min().to_pydatetime() |
|
max_date = df["publishedAt"].max().to_pydatetime() |
|
|
|
|
|
col1, col2 = st.columns([3, 1]) |
|
|
|
with col1: |
|
date_range = st.slider( |
|
"Select Date Range", |
|
min_value=min_date, |
|
max_value=max_date, |
|
value=(min_date, max_date), |
|
format="DD/MMM/YYYY", |
|
) |
|
|
|
with col2: |
|
selected_metric = st.selectbox( |
|
"Sort by:", |
|
options=metrics, |
|
index=0, |
|
) |
|
|
|
|
|
|
|
mask = df["publishedAt"].between(*date_range) |
|
df = df[mask] |
|
|
|
|
|
df["Name"] = df.author.apply(lambda x: x["fullname"]) |
|
df["username"] = df.author.apply(lambda x: x["name"]) |
|
df["totalReactions"] = df.reactions.apply(lambda x: sum([_["count"] for _ in x])) |
|
df["Num of posts"] = 1 |
|
data = ( |
|
df.groupby(["username", "Name"])[metrics] |
|
.sum() |
|
.sort_values(selected_metric, ascending=False) |
|
.reset_index() |
|
) |
|
data.index = np.arange(1, len(data) + 1) |
|
data.index.name = "Rank" |
|
|
|
|
|
def make_clickable(val): |
|
return f'<a target="_blank" href="https://huggingface.co/{val}">{val}</a>' |
|
|
|
|
|
df_styled = data.style.format({"username": make_clickable}) |
|
st.write( |
|
f"""<center>{df_styled.to_html(escape=False, index=False)}""", |
|
unsafe_allow_html=True, |
|
) |
|
|
|
|
|
|
|
|