Spaces:
Runtime error
Runtime error
File size: 7,672 Bytes
a2e4df5 efb1933 8e42039 a2e4df5 8e42039 302b56d 027a488 302b56d e036cb2 302b56d a2e4df5 302b56d a2e4df5 e4d88e1 8e42039 efb1933 a2e4df5 302b56d a2e4df5 ae3d70b a2e4df5 302b56d e4d88e1 302b56d 8e42039 302b56d ae3d70b a2e4df5 e4d88e1 a2e4df5 8e42039 a2e4df5 7821572 3576a82 a2e4df5 8e42039 9e3f52f a2e4df5 f570858 8055d0a 302b56d 8055d0a f570858 a2e4df5 efb1933 9e3f52f 615a420 efb1933 a2e4df5 efb1933 0c49ba1 efb1933 0c49ba1 efb1933 302b56d a2e4df5 302b56d a2e4df5 c140c30 a2e4df5 1f5d9f7 82101b9 1f5d9f7 82101b9 1f5d9f7 811f211 1f5d9f7 21882d6 8e42039 a2e4df5 82101b9 8c480b7 efb1933 a2e4df5 21882d6 73352b7 21882d6 a2e4df5 4544fe2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import os
import gradio as gr
import pandas as pd
from huggingface_hub import InferenceClient
from threading import Timer
from tqdm import tqdm
import time
HUGGINGFACE_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
def loop_query_data():
global all_models
models_dict = InferenceClient(token=HUGGINGFACE_TOKEN).list_deployed_models("text-generation-inference")
models = models_dict['text-generation'] + models_dict['text2text-generation']
models_vision = models_dict['image-text-to-text']
models_others = InferenceClient(token=HUGGINGFACE_TOKEN).list_deployed_models(frameworks="all")["text-generation"]
models_conclusion = {
"Model": [],
"API": [],
"Text Completion": [],
"Chat Completion": [],
"Vision": []
}
all_models = list(set(all_models + models + models_vision + models_others))
for m in tqdm(all_models):
text_available = False
chat_available = False
vision_available = False
if m in models_vision:
vision_available = True
pro_sub = False
try:
InferenceClient(m, timeout=10, token=HUGGINGFACE_TOKEN).text_generation("Hi.", max_new_tokens=1)
text_available = True
except Exception as e:
# print(e)
if e and "Model requires a Pro subscription" in str(e):
pro_sub = True
if e and "Rate limit reached" in str(e):
print("Rate Limited, waiting 1 hour...")
time.sleep(60*60)
try:
InferenceClient(m, timeout=10).chat_completion(messages=[{'role': 'user', 'content': 'Hi.'}], max_tokens=1)
chat_available = True
except Exception as e:
# print(e)
if e and "Model requires a Pro subscription" in str(e):
pro_sub = True
if e and "Rate limit reached" in str(e):
print("Rate Limited, waiting 1 hour...")
time.sleep(60*60)
models_conclusion["Model"].append(m)
models_conclusion["API"].append("Free" if chat_available or text_available else ("Pro Subscription" if pro_sub else "Not Responding"))
models_conclusion["Chat Completion"].append("---" if (pro_sub or (not chat_available and not text_available)) else ("β" if chat_available else "β"))
models_conclusion["Text Completion"].append("---" if (pro_sub or (not chat_available and not text_available)) else ("β" if text_available else "β"))
models_conclusion["Vision"].append("β" if vision_available else "β")
pd.DataFrame(models_conclusion).to_csv(str(os.getcwd())+"/data.csv", index=False)
return models_conclusion
def get_available_free(use_cache = False):
if use_cache:
if os.path.exists(str(os.getcwd())+"/data.csv"):
# print("Loading data from file...")
return pd.read_csv("data.csv").to_dict(orient='list')
else:
return loop_query_data()
def update_data(use_cache = False):
data = get_available_free(use_cache)
df = pd.DataFrame(data)
status_mapping = {"β": 0, "β": 1, "---": 2}
df['Text Completion'] = df['Text Completion'].map(status_mapping)
df['Chat Completion'] = df['Chat Completion'].map(status_mapping)
df = df.sort_values(by=['API', 'Text Completion', 'Chat Completion', 'Vision'])
df['Text Completion'] = df['Text Completion'].map({v: k for k, v in status_mapping.items()})
df['Chat Completion'] = df['Chat Completion'].map({v: k for k, v in status_mapping.items()})
return df
def display_table(search_query="", filters=[], use_cache=False):
df = update_data(use_cache)
search_query = str(search_query)
if search_query:
filtered_df = df[df["Model"].str.contains(search_query, case=False)]
else:
filtered_df = df
if filters:
api_filters = [f for f in filters if f in ["Free", "Pro Subscription", "Not Responding"]]
if api_filters:
filtered_df = filtered_df[filtered_df["API"].isin(api_filters)]
if "Text Completion" in filters:
filtered_df = filtered_df[filtered_df["Text Completion"] == "β"]
if "Chat Completion" in filters:
filtered_df = filtered_df[filtered_df["Chat Completion"] == "β"]
if "Vision" in filters:
filtered_df = filtered_df[filtered_df["Vision"] == "β"]
styled_df = filtered_df.style.apply(apply_row_styles, axis=1, subset=["Model", "API", "Text Completion", "Chat Completion", "Vision"])
return styled_df
def apply_row_styles(row):
api_value = row["API"]
return [
color_status(api_value, row["Model"]),
color_status(api_value, row["API"]),
color_status(api_value, row["Text Completion"]),
color_status(api_value, row["Chat Completion"]),
color_status(api_value, row["Vision"])
]
def color_status(api_value, cell_value):
if cell_value == "---":
if api_value == "Free":
return 'background-color: green'
elif api_value == "Pro Subscription":
return 'background-color: blue'
elif api_value == "Not Responding":
return 'background-color: red'
else:
if cell_value == "Free":
return 'background-color: green'
elif cell_value == "Pro Subscription":
return 'background-color: blue'
elif cell_value == "Not Responding":
return 'background-color: red'
elif cell_value == "β":
return 'background-color: green'
elif cell_value == "β":
return 'background-color: red'
return ''
def search_models(query, filters = [], use_cache = True):
return display_table(query, filters, use_cache)
description = """
This is a space that retrieves the status of supported HF LLM Serverless Inference APIs.
*Updates every 2 hours!*
If you are a student or you just want to quickly see what models are available to experiment for free, you are most likely highly interested on the free API huggingface provides... but like me, you struggle to find what models are available or not!
This is why I made this space that every 2 hours checks and updates the status of the list of LLMs that are cached and, in theory, supported by retrieving the list in `InferenceClient().list_deployed_models()`.
*It may not have all of the available ones... for now... it's WIP*
So all you need is to plug:
```py
from huggingface_hub import InferenceClient
inf = InferenceClient(model = "MODEL", token = "TOKEN")
response = inf.text_generation("And play !!")
print(response)
```
"""
first_run = True
all_models = []
with gr.Blocks() as demo:
gr.Markdown("## HF Serverless LLM Inference API Status")
gr.Markdown(description)
search_box = gr.Textbox(label="Search for a model", placeholder="Type model name here...")
filter_box = gr.CheckboxGroup(choices=["Free", "Pro Subscription", "Not Responding", "Text Completion", "Chat Completion", "Vision"], label="Filters")
table = gr.Dataframe(value=display_table(use_cache=True), headers="keys")
def update_filters(query, filters):
return search_models(query, filters, use_cache=True)
search_box.change(fn=update_filters, inputs=[search_box, filter_box], outputs=table)
filter_box.change(fn=update_filters, inputs=[search_box, filter_box], outputs=table)
def update_every_two_hours(first_run):
search_models(search_box.value, use_cache = first_run)
Timer(7200, update_every_two_hours, args=(False,)).start()
Timer(0, update_every_two_hours, args=(first_run,)).start()
demo.launch() |