import gradio as gr from huggingface_hub import HfApi, hf_hub_download from huggingface_hub.repocard import metadata_load import pandas as pd benchmark_user = 'vsd-benchmark' fashion_dataset = f'{benchmark_user}/vsd-fashion' benchmark_tag = 'vsd' hf_api = HfApi() models = list(hf_api.list_models(filter=benchmark_tag)) SUPPORTED_TASKS = [ 'in_catalog_retrieval_zero_shot', 'in_catalog_open_catalog', 'in_catalog_closed_catalog', 'consumer-catalog_wild_zero_shot', ] print("Tagged models", models) def create_model_link(model_id, link=None, type='repos'): if link is None: type_url_part = '' if type != 'repos' and not None: type_url_part = f"/{type}" link = f"https://huggingface.co{type_url_part}/{model_id}" return ( f'{model_id}' ) def get_model_results(model_meta): metrics_meta = [] for index in model_meta['model-index']: for result in index['results']: if result['dataset']['type'].split('/')[0] == benchmark_user and result['dataset']['config'] in SUPPORTED_TASKS: metrics_dict = {metric['name']: metric['value'] for metric in result['metrics']} metrics_meta += [dict(dataset=result['dataset']['type'], task=result['dataset']['config'], **metrics_dict)] return metrics_meta results = [] for model in models: readme_path = hf_hub_download(model.modelId, filename="README.md") meta = metadata_load(readme_path) model_results = get_model_results(meta) for result in model_results: results += [dict(model=create_model_link(model.modelId), **result)] paper_models_df = pd.read_csv('./paper_models.csv', index_col=0) paper_models_df['source'] = 'VSD Paper' results_df = pd.DataFrame(results) results_df['source'] = 'HuggingFace' df = pd.concat([results_df, paper_models_df]) print(df) block = gr.Blocks() with block: for task in SUPPORTED_TASKS: group = df[df['task'] == task] if len(group) > 0: gr.Markdown(f"## Task - {task}") group = group.sort_values('ROC_AUC', ascending=False) group['dataset'] = group['dataset'].apply(lambda x: create_model_link(x, type="datasets")) gr.DataFrame( group.reset_index(drop=True), datatype=['markdown', 'markdown'] + ['number'] * len(group.columns), wrap=True, ) block.launch()