File size: 963 Bytes
761cd5d
7dfe065
 
 
 
 
 
 
 
 
761cd5d
c639c51
 
761cd5d
 
 
 
 
 
 
 
e5e2b84
761cd5d
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
from huggingface_hub import snapshot_download


def my_snapshot_download(repo_id, revision, local_dir, repo_type, max_workers):
    for i in range(10):
        try:
            snapshot_download(repo_id=repo_id, revision=revision, local_dir=local_dir, repo_type=repo_type, max_workers=max_workers)
            return
        except Exception:
            import time
            time.sleep(60)
    return


def get_dataset_url(row):
    dataset_name = row['Benchmark']
    dataset_url = row['Dataset Link']
    benchmark = f'<a target="_blank" href="{dataset_url}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{dataset_name}</a>'
    return benchmark


def get_dataset_summary_table(file_path):
    df = pd.read_csv(file_path)

    df['Benchmark'] = df.apply(lambda x: get_dataset_url(x), axis=1)

    df = df[['Category', 'Benchmark', 'Data Split', 'Data Size', 'Language']]

    return df