## LIBRARIES ### from cProfile import label from tkinter import font from turtle import width import streamlit as st import pandas as pd from datetime import datetime import plotly.express as px def read_file_to_df(file): return pd.read_csv(file) def date_range(df): time = df.index.to_list() time_range = [] for t in time: time_range.append(str(datetime.strptime(t, '%Y-%m-%dT%H:%M:%S.%fZ').date().month) +'/' + str(datetime.strptime(t, '%Y-%m-%dT%H:%M:%S.%fZ').date().day)) return time_range if __name__ == "__main__": ### STREAMLIT APP CONGFIG ### st.set_page_config(layout="wide", page_title="HF Hub Model Usage Visualization") st.header("Model Usage Visualization") with st.expander("How to read and interact with the plot:"): st.markdown("The plots below visualize model usage for HF models created in mid 2021 (top) vs. models created in mid 2022 (bottom). Note the y-axis range is different for each plot.") st.markdown("The plots are categorized based on model popularity. I first created a histogram of weekly mean usage across all models and then grouped them into these categorizes so that the plots are easier to read.") st.markdown("The plots are interactive. Hover over the points to see the model name and the number of weekly mean usage. Click on the legend to hide/show the models.") popularity = st.radio("Model popularity", ('Low', 'Moderate', 'High'), key = "popularity", index=2, horizontal = True) with st.container(): df_2021 = read_file_to_df("./assets/2021/model_init_time.csv") df_2021.fillna(0, inplace=True) df_plot = df_2021.set_index('Model').T df_plot.index = date_range(df_plot) df_plot_2021 = pd.DataFrame() if popularity == 'Low': df_plot_2021 = df_plot[df_plot.columns[(df_plot.mean(axis=0)<=5000) & (df_plot.mean(axis=0)>=3500)]] elif popularity == 'Moderate': df_plot_2021 = df_plot[df_plot.columns[(df_plot.mean(axis=0)<=40000) & (df_plot.mean(axis=0)>=5000)]] else: df_plot_2021 = df_plot[df_plot.columns[df_plot.mean(axis=0)>=40000]] fig = px.line(df_plot_2021, title="Models created in 2021", labels={"index": "Weeks", "value": "Usage", "variable": "Model"}) st.plotly_chart(fig, use_container_width=True) with st.container(): df_2022 = read_file_to_df("./assets/2022/model_init_time.csv") df_2022.fillna(0, inplace=True) df_plot = df_2022.set_index('Model').T df_plot.index = date_range(df_plot) df_plot_2022 = pd.DataFrame() if popularity == 'Low': df_plot_2022 = df_plot[df_plot.columns[(df_plot.mean(axis=0)<500) & (df_plot.mean(axis=0)>=300)]] elif popularity == 'Moderate': df_plot_2022 = df_plot[df_plot.columns[(df_plot.mean(axis=0)<=1500) & (df_plot.mean(axis=0)>=500)]] else: df_plot_2022 = df_plot[df_plot.columns[df_plot.mean(axis=0)>=1500]] fig = px.line(df_plot_2022, title="Models created in 2022", labels={"index": "Weeks", "value": "Usage", "variable": "Model"}) st.plotly_chart(fig, use_container_width=True)