import gradio as gr import pandas as pd import json from datasets import load_dataset import requests from huggingface_hub import list_datasets, list_models, list_spaces from collections import Counter import numpy as np def compute_ranking(df, column, method="sum", keep="last"): df_rank = df.groupby("author").aggregate({column: method})[[column]] df_rank = df_rank.sort_values(by=column) df_rank.reset_index(drop=True, inplace=True) df_rank["top_perc"] = df_rank.apply(lambda x: f"{100 * (1-(x.name/len(df_rank))):.2f}", axis=1) df_rank = df_rank.drop_duplicates(subset=column, keep=keep) df_rank = df_rank.rename({column: "value"}, axis='columns') return df_rank class NpEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.integer): return int(obj) if isinstance(obj, np.floating): return float(obj) if isinstance(obj, np.ndarray): return obj.tolist() return super(NpEncoder, self).default(obj) ds = load_dataset("open-source-metrics/model-repos-stats", split="train") df = ds.to_pandas() df_ranks = {} df_ranks["likes"] = compute_ranking(df, "likes") df_ranks["downloads"] = compute_ranking(df, "downloads_30d") df_ranks["repos"] = compute_ranking(df, "repo_id", method="count") with open("./html_template.html", "r") as f: template = f.read() def create_user_summary(user_name): summary = {} df_user = df.loc[df["author"]==user_name] if len(df_user) == 0: return """

Unfortunately there is not enough data for your report.


""" r = requests.get(f'https://huggingface.co/api/users/{user_name}/likes') user_datasets = [dataset for dataset in list_datasets(author=user_name)] user_spaces = [space for space in list_spaces(author=user_name)] summary["likes_user_total"] = df_user["likes"].sum() summary["likes_user_given"] = len(r.json()) summary["likes_user_top"] = df_ranks["likes"][df_ranks["likes"]["value"]>=summary["likes_user_total"]].iloc[0]["top_perc"] summary["likes_repo_most"] = df_user.sort_values(by="likes", ascending=False).iloc[0]["repo_id"] summary["likes_repo_most_n"] = df_user.sort_values(by="likes", ascending=False).iloc[0]["likes"] summary["downloads_user_total"] = df_user["downloads_30d"].sum() summary["downloads_user_top"] = df_ranks["downloads"][df_ranks["downloads"]["value"]>=summary["downloads_user_total"]].iloc[0]["top_perc"] summary["downlods_repo_most"] = df_user.sort_values(by="downloads_30d", ascending=False).iloc[0]["repo_id"] summary["downlods_repo_most_n"] = df_user.sort_values(by="downloads_30d", ascending=False).iloc[0]["downloads_30d"] summary["repos_model_total"] = len(df_user) summary["repos_model_top"] = df_ranks["repos"][df_ranks["repos"]["value"]>=summary["repos_model_total"]].iloc[0]["top_perc"] summary["repos_model_fav_type"] = Counter(df_user["model_type"].dropna()).most_common(1)[0][0] summary["repos_datasets_total"] = len(user_datasets) summary["repos_spaces_total"] = len(user_spaces) summary["repos_spaces_fav_sdk"] = Counter([getattr(info, "sdk", "n/a") for info in user_spaces]).most_common(1)[0][0] return dict_to_html(summary) def dict_to_html(summary): report = template for key in summary: report = report.replace("{{" + key + "}}", str(summary[key])) return report demo = gr.Blocks( css=".gradio-container {background-color: #000000}" ) with demo: with gr.Row(): gr.HTML(value="""

Enter your HF user name:

""") with gr.Row(): username = gr.Textbox(lines=1, max_lines=1, label="User name") with gr.Row(): run = gr.Button() with gr.Row(): output = gr.HTML(label="Generated code") event = run.click(create_user_summary, [username], output) demo.launch()