File size: 2,842 Bytes
7d6d8a0
7fe41f5
 
ccb02b7
7fe41f5
7d6d8a0
 
c93e805
196074b
7d6d8a0
196074b
7d6d8a0
 
196074b
7d6d8a0
 
196074b
7d6d8a0
 
 
 
 
 
 
 
 
 
 
 
 
c93e805
7d6d8a0
ccb02b7
 
 
7d6d8a0
 
 
 
 
 
 
 
 
 
 
ccb02b7
7d6d8a0
 
 
 
 
5d9df6f
7d6d8a0
 
 
 
3e651c1
 
 
 
7d6d8a0
3e651c1
7d6d8a0
 
3e651c1
ccb02b7
7d6d8a0
 
 
 
ce63bf0
7d6d8a0
 
ccb02b7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import gradio as gr
import requests

from huggingface_hub import list_models, list_datasets, list_spaces
from typing import Union

# Helper function to get the total storage for models, datasets, or spaces
def get_total_storage(namespace, resource_type, oauth_token: Union[gr.OAuthToken, None]):
    token = oauth_token.token if oauth_token else None
    if resource_type == "model":
        resources = list(list_models(author=namespace, token=token))
        url_base = "https://huggingface.co/api/models"
    elif resource_type == "dataset":
        resources = list(list_datasets(author=namespace, token=token))
        url_base = "https://huggingface.co/api/datasets"
    elif resource_type == "space":
        resources = list(list_spaces(author=namespace, token=token))
        url_base = "https://huggingface.co/api/spaces"
    
    total_storage = 0
    for resource in resources:
        resource_id = resource.id
        url = f"{url_base}/{resource_id}/treesize/main"
        response = requests.get(url)
        if response.status_code == 200:
            size_info = response.json()
            total_storage += size_info.get("size", 0)

    return total_storage, len(resources)

def get_report(namespace, oauth_token: Union[gr.OAuthToken, None]):
    # Fetch storage and counts for models, datasets, and spaces
    model_storage, n_models = get_total_storage(namespace, "model", oauth_token)
    dataset_storage, n_datasets = get_total_storage(namespace, "dataset", oauth_token)
    space_storage, n_spaces = get_total_storage(namespace, "space", oauth_token)
    
    # Total storage
    total_storage = model_storage + dataset_storage + space_storage
    total_storage_gb = total_storage / (1024 ** 3)  # Convert from bytes to GB
    total_storage_tb = total_storage_gb / 1024       # Convert from GB to TB

    # Cost calculation (1 TB = 20 USD)
    estimated_cost = total_storage_tb * 20
    
    # Generate a report
    report = f"""
    ## Hugging Face Storage Report for {namespace}
    - **Number of Models**: {n_models}
    - **Number of Datasets**: {n_datasets}
    - **Number of Spaces**: {n_spaces}
    
    - **Total Storage**: {total_storage_gb:.2f} GB ({total_storage_tb:.2f} TB)
    - **Estimated Cost**: ${estimated_cost:.2f} USD (at 1 TB = $20USD)
    """
    
    return report

css = """
.main_ui_logged_out{opacity: 0.3; pointer-events: none}
"""

# Create Gradio UI
with gr.Blocks(css=css) as demo:
    gr.Markdown("# Hugging Face Storage Report")
    
    gr.LoginButton()
    namespace = gr.Textbox(label="Enter Namespace (username or org)")
    output = gr.Markdown()
    
    # Button to trigger the report generation
    report_button = gr.Button("Generate Report")
    report_button.click(fn=get_report, inputs=namespace, outputs=output, concurrency_limit=10)

# Launch the Gradio app
demo.launch()