Author: Julien Simon 

In [None]:
import huggingface_hub
import pandas as pd

### Retrieve metadata on all public models

In [None]:
models = huggingface_hub.list_models(full=True)

In [None]:
models[0]

In [None]:
huggingface_hub.model_info('distilgpt2', securityStatus=True)

In [None]:
models_df = pd.DataFrame(columns=['model_name', 'task_type', 'downloads'])

In [None]:
for m in models:
 if hasattr(m, 'downloads'):
 downloads = m.downloads
 else:
 downloads = 0
 m_df = pd.DataFrame({'model_name': [m.modelId],'task_type': [m.pipeline_tag], 'downloads': [downloads]})
 models_df = models_df.append(m_df, ignore_index=True)

In [None]:
models_df.head()

### List tast types

In [None]:
task_types = models_df['task_type'].unique()
print(task_types)
print(len(task_types))

### For each task type, find out the percentage of downloads that the top 'n' models represent

In [None]:
n = 20

In [None]:
for t in task_types:
 if t is None:
 continue
 task_models_df = models_df[models_df['task_type']==t]
 topn_downloads = task_models_df[:n]['downloads'].sum()
 all_downloads = task_models_df['downloads'].sum()
 if all_downloads!=0:
 print('{} ({} models): {:.1%}'.format(t, len(task_models_df), topn_downloads/all_downloads))

### For each task type, list the repository of the top 'n' models

In [None]:
BASE_URL = 'https://huggingface.co'

for t in task_types:
 if t is None:
 continue
 task_models_df = models_df[models_df['task_type']==t]
 topn_models = task_models_df[:n]['downloads']
 print('[{}]'.format(t))
 if len(task_models_df) < n:
 indexes = range(len(task_models_df))
 else:
 indexes = range(n)
 for i in indexes:
 print('{}/{}'.format(BASE_URL, task_models_df.iloc[i]['model_name']))