File size: 7,672 Bytes
a2e4df5
 
 
 
 
efb1933
8e42039
a2e4df5
8e42039
 
 
 
302b56d
027a488
302b56d
e036cb2
302b56d
a2e4df5
 
 
 
302b56d
 
a2e4df5
e4d88e1
8e42039
efb1933
a2e4df5
 
302b56d
 
 
a2e4df5
 
ae3d70b
a2e4df5
302b56d
e4d88e1
302b56d
 
 
8e42039
 
302b56d
ae3d70b
a2e4df5
 
e4d88e1
a2e4df5
 
 
8e42039
 
a2e4df5
 
 
 
7821572
3576a82
a2e4df5
 
8e42039
 
 
 
 
 
 
 
9e3f52f
 
a2e4df5
f570858
8055d0a
 
 
 
 
302b56d
8055d0a
 
 
f570858
a2e4df5
 
efb1933
9e3f52f
615a420
efb1933
a2e4df5
 
 
 
efb1933
 
0c49ba1
 
 
efb1933
 
 
 
0c49ba1
 
efb1933
302b56d
a2e4df5
 
 
 
 
 
 
 
302b56d
 
a2e4df5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c140c30
 
a2e4df5
1f5d9f7
82101b9
1f5d9f7
 
 
82101b9
 
1f5d9f7
811f211
1f5d9f7
 
 
 
 
 
 
21882d6
8e42039
a2e4df5
 
 
 
82101b9
8c480b7
efb1933
 
 
 
 
 
a2e4df5
21882d6
73352b7
21882d6
 
 
a2e4df5
4544fe2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import os
import gradio as gr
import pandas as pd
from huggingface_hub import InferenceClient
from threading import Timer
from tqdm import tqdm
import time

HUGGINGFACE_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")

def loop_query_data():
    global all_models
    models_dict = InferenceClient(token=HUGGINGFACE_TOKEN).list_deployed_models("text-generation-inference")
    models = models_dict['text-generation'] + models_dict['text2text-generation']
    models_vision = models_dict['image-text-to-text']
    models_others = InferenceClient(token=HUGGINGFACE_TOKEN).list_deployed_models(frameworks="all")["text-generation"]
    
    models_conclusion = {
        "Model": [],
        "API": [],
        "Text Completion": [],
        "Chat Completion": [],
        "Vision": []
    }

    all_models = list(set(all_models + models + models_vision + models_others))
    for m in tqdm(all_models):
        text_available = False
        chat_available = False
        vision_available = False
        if m in models_vision:
            vision_available = True
        pro_sub = False
        try:
            InferenceClient(m, timeout=10, token=HUGGINGFACE_TOKEN).text_generation("Hi.", max_new_tokens=1)
            text_available = True
        except Exception as e:
            # print(e)
            if e and "Model requires a Pro subscription" in str(e):
                pro_sub = True
            if e and "Rate limit reached" in str(e):
                print("Rate Limited, waiting 1 hour...")
                time.sleep(60*60)
        try:
            InferenceClient(m, timeout=10).chat_completion(messages=[{'role': 'user', 'content': 'Hi.'}], max_tokens=1)
            chat_available = True
        except Exception as e:
            # print(e)
            if e and "Model requires a Pro subscription" in str(e):
                pro_sub = True
            if e and "Rate limit reached" in str(e):
                print("Rate Limited, waiting 1 hour...")
                time.sleep(60*60)
        models_conclusion["Model"].append(m)
        models_conclusion["API"].append("Free" if chat_available or text_available else ("Pro Subscription" if pro_sub else "Not Responding"))
        models_conclusion["Chat Completion"].append("---" if (pro_sub or (not chat_available and not text_available)) else ("βœ“" if chat_available else "βŒ€"))
        models_conclusion["Text Completion"].append("---" if (pro_sub or (not chat_available and not text_available)) else ("βœ“" if text_available else "βŒ€"))
        models_conclusion["Vision"].append("βœ“" if vision_available else "βŒ€")
    pd.DataFrame(models_conclusion).to_csv(str(os.getcwd())+"/data.csv", index=False)
    return models_conclusion

def get_available_free(use_cache = False):
    if use_cache:
        if os.path.exists(str(os.getcwd())+"/data.csv"):
            # print("Loading data from file...")
            return pd.read_csv("data.csv").to_dict(orient='list')
    else:
        return loop_query_data()

def update_data(use_cache = False):
    data = get_available_free(use_cache)
    df = pd.DataFrame(data)
    
    status_mapping = {"βœ“": 0, "βŒ€": 1, "---": 2}

    df['Text Completion'] = df['Text Completion'].map(status_mapping)
    df['Chat Completion'] = df['Chat Completion'].map(status_mapping)
    
    df = df.sort_values(by=['API', 'Text Completion', 'Chat Completion', 'Vision'])
    
    df['Text Completion'] = df['Text Completion'].map({v: k for k, v in status_mapping.items()})
    df['Chat Completion'] = df['Chat Completion'].map({v: k for k, v in status_mapping.items()})
    
    return df

def display_table(search_query="", filters=[], use_cache=False):
    df = update_data(use_cache)
    search_query = str(search_query)
    
    if search_query:
        filtered_df = df[df["Model"].str.contains(search_query, case=False)]
    else:
        filtered_df = df

    if filters:
        api_filters = [f for f in filters if f in ["Free", "Pro Subscription", "Not Responding"]]
        if api_filters:
            filtered_df = filtered_df[filtered_df["API"].isin(api_filters)]
        if "Text Completion" in filters:
            filtered_df = filtered_df[filtered_df["Text Completion"] == "βœ“"]
        if "Chat Completion" in filters:
            filtered_df = filtered_df[filtered_df["Chat Completion"] == "βœ“"]
        if "Vision" in filters:
            filtered_df = filtered_df[filtered_df["Vision"] == "βœ“"]

    styled_df = filtered_df.style.apply(apply_row_styles, axis=1, subset=["Model", "API", "Text Completion", "Chat Completion", "Vision"])
    return styled_df

def apply_row_styles(row):
    api_value = row["API"]
    return [
        color_status(api_value, row["Model"]),
        color_status(api_value, row["API"]),
        color_status(api_value, row["Text Completion"]),
        color_status(api_value, row["Chat Completion"]),
        color_status(api_value, row["Vision"])
    ]

def color_status(api_value, cell_value):
    if cell_value == "---":
        if api_value == "Free":
            return 'background-color: green'
        elif api_value == "Pro Subscription":
            return 'background-color: blue'
        elif api_value == "Not Responding":
            return 'background-color: red'
    else:
        if cell_value == "Free":
            return 'background-color: green'
        elif cell_value == "Pro Subscription":
            return 'background-color: blue'
        elif cell_value == "Not Responding":
            return 'background-color: red'
        elif cell_value == "βœ“":
            return 'background-color: green'
        elif cell_value == "βŒ€":
            return 'background-color: red'
    return ''

def search_models(query, filters = [], use_cache = True):
    return display_table(query, filters,  use_cache)

description = """
This is a space that retrieves the status of supported HF LLM Serverless Inference APIs.
*Updates every 2 hours!*

If you are a student or you just want to quickly see what models are available to experiment for free, you are most likely highly interested on the free API huggingface provides... but like me, you struggle to find what models are available or not!
This is why I made this space that every 2 hours checks and updates the status of the list of LLMs that are cached and, in theory, supported by retrieving the list in `InferenceClient().list_deployed_models()`.
*It may not have all of the available ones... for now... it's WIP*

So all you need is to plug:
```py
from huggingface_hub import InferenceClient
inf = InferenceClient(model = "MODEL", token = "TOKEN")
response = inf.text_generation("And play !!")
print(response)
```
"""
first_run = True
all_models = []
with gr.Blocks() as demo:
    gr.Markdown("## HF Serverless LLM Inference API Status")
    gr.Markdown(description)
    search_box = gr.Textbox(label="Search for a model", placeholder="Type model name here...")
    filter_box = gr.CheckboxGroup(choices=["Free", "Pro Subscription", "Not Responding", "Text Completion", "Chat Completion", "Vision"], label="Filters")
    table = gr.Dataframe(value=display_table(use_cache=True), headers="keys")

    def update_filters(query, filters):
        return search_models(query, filters, use_cache=True)

    search_box.change(fn=update_filters, inputs=[search_box, filter_box], outputs=table)
    filter_box.change(fn=update_filters, inputs=[search_box, filter_box], outputs=table)
    
    def update_every_two_hours(first_run):
        search_models(search_box.value, use_cache = first_run)
        Timer(7200, update_every_two_hours, args=(False,)).start()
    
    Timer(0, update_every_two_hours, args=(first_run,)).start()
    
demo.launch()