File size: 3,532 Bytes
7ff7aab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import gradio as gr
import pandas as pd
from hub_utils import check_for_discussion, report_results
from model_utils import calculate_memory, get_model
from huggingface_hub.utils import HfHubHTTPError
from hub_model_stats_utils import get_model_type_downloads

# We need to store them as globals because gradio doesn't have a way for us to pass them into the button
MODEL = None
TASK_INP = None

def get_mem_results(model_name: str, library: str, options: list, access_token: str):
    global MODEL
    MODEL = get_model(model_name, library, access_token)
    try:
        has_discussion = check_for_discussion(model_name)
    except HfHubHTTPError:
        has_discussion = True
    title = f"## Memory usage for '{model_name}'"
    data = calculate_memory(MODEL, options)
    return [title, gr.update(visible=True, value=pd.DataFrame(data)), gr.update(visible=not has_discussion)]


with gr.Blocks() as demo:
    gr.Markdown("""<h1>Positron Model Universe Explorer</h1>""")
    with gr.Tab("Model Size"):
        gr.Markdown(
            """<h1>Model Memory Calculator</h1>

            This tool will help you calculate how much vRAM is needed to train and perform big model inference
            on a model hosted on the 🤗 Hugging Face Hub. The minimum recommended vRAM needed for a model
            s denoted as the size of the "largest layer", and training of a model is roughly 4x its size (for Adam)."""
        )

        out_text = gr.Markdown()
        mem_out = gr.DataFrame(
            headers=["dtype", "Largest Layer", "Total Size", "Training using Adam"],
            interactive=False,
            visible=True,
        )
        with gr.Row():
            model_name_inp = gr.Textbox(label="Model Name or URL", value="TheBloke/Nous-Hermes-13B-GPTQ")
        with gr.Row():
            library = gr.Radio(["auto", "transformers", "timm"], label="Library", value="auto")
            options = gr.CheckboxGroup(
                ["float32", "float16/bfloat16", "int8", "int4"],
                value="float32",
                label="Model Precision",
            )
            access_token = gr.Textbox(label="API Token", placeholder="Optional (for gated models)")
        with gr.Row():
            mem_btn = gr.Button("Calculate Memory Usage")
            post_to_hub = gr.Button(
                value="Report results in this model repo's discussions!\n(Will open in a new tab)", visible=False
            )

        mem_btn.click(
            get_mem_results,
            inputs=[model_name_inp, library, options, access_token],
            outputs=[out_text, mem_out, post_to_hub],
        )

    with gr.Tab("Model Type"):
        gr.Markdown(
            """<h1>Models by Model Task</h1>"""
        )

        with gr.Row():
            task_inp = gr.Dropdown(choices = ["text-generation", "question-answering", "text-classification", "unconditional-image-generation"], 
                value="text-generation", interactive=True, filterable=True, label="Model Task", show_label=True
            )
            stat_inp = gr.Radio(choices = ["downloads", "likes"], value = "downloads", label = "Sorting By", show_label=True, interactive = True)
        
        with gr.Row():
            stats_btn = gr.Button("Pull Stats from HF API")

        stats_btn.click(
            get_model_type_downloads,
            inputs=[task_inp],
            outputs=[stats_out]
        )

        stats_out = gr.Dataframe(row_count=10)
        # gr.Dataframe(get_model_type_likes(), row_count=10)

demo.launch(share=True)