File size: 4,100 Bytes
7c7fb6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd4b9f6
 
 
 
7c7fb6e
8d98bdc
 
7c7fb6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d98bdc
 
fd4b9f6
7c7fb6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import gradio as gr


import pandas as pd
import json
from datasets import load_dataset
import requests
from huggingface_hub import list_datasets, list_models, list_spaces
from collections import Counter
import numpy as np


def compute_ranking(df, column, method="sum", keep="last"):
    df_rank = df.groupby("author").aggregate({column: method})[[column]]
    df_rank = df_rank.sort_values(by=column)
    df_rank.reset_index(drop=True, inplace=True)
    df_rank["top_perc"] = df_rank.apply(lambda x: f"{100 * (1-(x.name/len(df_rank))):.2f}", axis=1)
    df_rank = df_rank.drop_duplicates(subset=column, keep=keep)
    df_rank = df_rank.rename({column: "value"}, axis='columns')
    return df_rank

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

ds = load_dataset("open-source-metrics/model-repos-stats", split="train")
df = ds.to_pandas()

df_ranks = {}
df_ranks["likes"] = compute_ranking(df, "likes")
df_ranks["downloads"] = compute_ranking(df, "downloads_30d")
df_ranks["repos"] = compute_ranking(df, "repo_id", method="count")

with open("./html_template.html", "r") as f:
    template = f.read()

def create_user_summary(user_name):
    summary = {}
    
    df_user = df.loc[df["author"]==user_name]
    
    if len(df_user) == 0:
        return """<br><p style="text-align: center;color: rgb(255, 210, 31);font-family: 'Consolas', monospace; font-size: 24px;">Unfortunately there is not enough data for your report.</p><br>"""


    r = requests.get(f'https://huggingface.co/api/users/{user_name}/likes')
    user_datasets = [dataset for dataset in list_datasets(author=user_name)]
    user_spaces = [space for space in list_spaces(author=user_name)]
    
    summary["likes_user_total"] = df_user["likes"].sum()
    summary["likes_user_given"] = len(r.json())
    summary["likes_user_top"] = df_ranks["likes"][df_ranks["likes"]["value"]>=summary["likes_user_total"]].iloc[0]["top_perc"]
    summary["likes_repo_most"] = df_user.sort_values(by="likes", ascending=False).iloc[0]["repo_id"]
    summary["likes_repo_most_n"] = df_user.sort_values(by="likes", ascending=False).iloc[0]["likes"]

    summary["downloads_user_total"] = df_user["downloads_30d"].sum()
    summary["downloads_user_top"] = df_ranks["downloads"][df_ranks["downloads"]["value"]>=summary["downloads_user_total"]].iloc[0]["top_perc"]
    summary["downlods_repo_most"] = df_user.sort_values(by="downloads_30d", ascending=False).iloc[0]["repo_id"]
    summary["downlods_repo_most_n"] = df_user.sort_values(by="downloads_30d", ascending=False).iloc[0]["downloads_30d"]

    summary["repos_model_total"] = len(df_user)
    summary["repos_model_top"] = df_ranks["repos"][df_ranks["repos"]["value"]>=summary["repos_model_total"]].iloc[0]["top_perc"]
    summary["repos_model_fav_type"] = Counter(df_user["model_type"].dropna()).most_common(1)[0][0]
    
    summary["repos_datasets_total"] = len(user_datasets)
    summary["repos_spaces_total"] = len(user_spaces)
    summary["repos_spaces_fav_sdk"] = Counter([getattr(info, "sdk", "n/a") for info in user_spaces]).most_common(1)[0][0]

    return dict_to_html(summary)


def dict_to_html(summary):

    report = template

    for key in summary:
        report = report.replace("{{" + key +  "}}", str(summary[key]))
    return report


demo = gr.Blocks(
    css=".gradio-container {background-color: #000000}"
)
with demo:
    with gr.Row():
        gr.HTML(value="""<p style="text-align: center; color: rgb(255, 210, 31); font-family: 'Consolas', monospace; font-size: 24px;"> <b>Enter your HF user name:</b></p>""")
    with gr.Row():
        username = gr.Textbox(lines=1, max_lines=1, label="User name")
    with gr.Row():
        run = gr.Button()
    with gr.Row():
        output = gr.HTML(label="Generated code")
    event = run.click(create_user_summary, [username], output)
demo.launch()