|
import gradio as gr |
|
import json |
|
import requests |
|
import pandas as pd |
|
|
|
def update_task_options(framework): |
|
config = { |
|
"Custom":["Custom"], |
|
"Diffusers":["text-to-image"], |
|
"Transformers":[ |
|
"text-classification", |
|
"zero-shot-classification", |
|
"token-classification", |
|
"question-answering", |
|
"fill-mask", |
|
"summarization", |
|
"translation", |
|
"text2text-generation", |
|
"text-generation", |
|
"feature-extraction", |
|
"image-classification", |
|
"automatic-speech-recognition", |
|
"audio-classification", |
|
"object-detection", |
|
"image-segmentation", |
|
"table-question-answering" |
|
] |
|
} |
|
return gr.Dropdown.update( |
|
choices=config[framework], |
|
value=config[framework][0] if len(config[framework]) > 0 else None |
|
) |
|
|
|
def update_regions(provider): |
|
available_regions = [] |
|
|
|
headers = { |
|
"Content-Type": "application/json", |
|
} |
|
endpoint_url = f"https://api.endpoints.huggingface.cloud/provider/{provider}/region" |
|
response = requests.get(endpoint_url, headers=headers) |
|
|
|
for region in response.json()['items']: |
|
if region['status'] == 'available': |
|
available_regions.append(f"{region['region']}/{region['label']}") |
|
|
|
return gr.Dropdown.update( |
|
choices=available_regions, |
|
value=available_regions[0] if len(available_regions) > 0 else None |
|
) |
|
|
|
def update_compute_options(provider, region): |
|
region = region.split("/")[0] |
|
available_compute_choices = [] |
|
|
|
headers = { |
|
"Content-Type": "application/json", |
|
} |
|
endpoint_url = f"https://api.endpoints.huggingface.cloud/provider/{provider}/region/{region}/compute" |
|
response = requests.get(endpoint_url, headers=headers) |
|
|
|
for compute in response.json()['items']: |
|
if compute['status'] == 'available': |
|
accelerator = compute['accelerator'] |
|
numAccelerators = compute['numAccelerators'] |
|
memoryGb = compute['memoryGb'].replace("Gi", "GB") |
|
architecture = compute['architecture'] |
|
instanceType = compute['instanceType'] |
|
|
|
type = f"{numAccelerators}vCPU {memoryGb} · {architecture}" if accelerator == "cpu" else f"{numAccelerators}x {architecture}" |
|
|
|
available_compute_choices.append( |
|
f"{compute['accelerator'].upper()} [{compute['instanceSize']}] · {type} · {instanceType}" |
|
) |
|
|
|
return gr.Dropdown.update( |
|
choices=available_compute_choices, |
|
value=available_compute_choices[0] if len(available_compute_choices) > 0 else None |
|
) |
|
|
|
def submit( |
|
hf_token_input, |
|
endpoint_name_input, |
|
provider_selector, |
|
region_selector, |
|
repository_selector, |
|
revision_selector, |
|
task_selector, |
|
framework_selector, |
|
compute_selector, |
|
min_node_selector, |
|
max_node_selector, |
|
security_selector |
|
): |
|
compute_resources = compute_selector.split("·") |
|
accelerator = compute_resources[0][:3].strip() |
|
|
|
size_l_index = compute_resources[0].index("[") - 1 |
|
size_r_index = compute_resources[0].index("]") |
|
size = compute_resources[0][size_l_index : size_r_index].strip() |
|
|
|
type = compute_resources[-1].strip() |
|
|
|
payload = { |
|
"accountId": repository_selector.split("/")[0], |
|
"compute": { |
|
"accelerator": accelerator.lower(), |
|
"instanceSize": size[1:], |
|
"instanceType": type, |
|
"scaling": { |
|
"maxReplica": int(max_node_selector), |
|
"minReplica": int(min_node_selector) |
|
} |
|
}, |
|
"model": { |
|
"framework": "custom", |
|
"image": { |
|
"huggingface": {} |
|
}, |
|
"repository": repository_selector.lower(), |
|
"revision": revision_selector, |
|
"task": task_selector.lower() |
|
}, |
|
"name": endpoint_name_input.strip(), |
|
"provider": { |
|
"region": region_selector.split("/")[0].lower(), |
|
"vendor": provider_selector.lower() |
|
}, |
|
"type": security_selector.lower() |
|
} |
|
|
|
|
|
payload = json.dumps(payload) |
|
|
|
headers = { |
|
"Authorization": f"Bearer {hf_token_input.strip()}", |
|
"Content-Type": "application/json", |
|
} |
|
endpoint_url = f"https://api.endpoints.huggingface.cloud/endpoint" |
|
print(f"Endpoint: {endpoint_url}") |
|
|
|
response = requests.post(endpoint_url, headers=headers, data=payload) |
|
if response.status_code == 400: |
|
return f"{response.text}. Malformed data in {payload}" |
|
elif response.status_code == 401: |
|
return "Invalid token" |
|
elif response.status_code == 409: |
|
return f"Error: {response.text}" |
|
elif response.status_code == 202: |
|
return f"Endpoint {endpoint_name_input} created successfully on {provider_selector.lower()} using {repository_selector.lower()}@{revision_selector}. \n Please check out the progress at https://ui.endpoints.huggingface.co/endpoints." |
|
else: |
|
return f"Something went wrong! - StatusCode:{response.status_code} - Error: {response.text}" |
|
|
|
def delete_endpoint( |
|
hf_token_input, |
|
endpoint_name_input |
|
): |
|
response = requests.delete( |
|
f"https://api.endpoints.huggingface.cloud/endpoint/{endpoint_name_input}", |
|
headers = { |
|
"Authorization": f"Bearer {hf_token_input.strip()}", |
|
"Content-Type": "application/json", |
|
} |
|
) |
|
if response.status_code == 401: |
|
return "Invalid token" |
|
elif response.status_code == 404: |
|
return f"Error: {response.text}" |
|
elif response.status_code == 202 or response.status_code == 200: |
|
return f"Endpoint {endpoint_name_input} deleted successfully." |
|
else: |
|
return f"Something went wrong! - StatusCode:{response.status_code} - Error: {response.text}" |
|
|
|
def get_all_endpoints( |
|
hf_token_input, |
|
method, |
|
): |
|
response = requests.get( |
|
f"https://api.endpoints.huggingface.cloud/endpoint", |
|
headers = { |
|
"Authorization": f"Bearer {hf_token_input.strip()}", |
|
"Content-Type": "application/json", |
|
}) |
|
if response.status_code == 401: |
|
if method == "info": |
|
return gr.DataFrame.update( |
|
value=pd.DataFrame({"Error":["Invalid Token -OR- No Endpoint Found !"]}), |
|
) |
|
else: |
|
return gr.Dropdown.update( |
|
value="Invalid token or No endpoints found!", |
|
) |
|
elif response.status_code == 200: |
|
endpoints_json = response.json() |
|
if method == "info": |
|
endpoints_df = pd.DataFrame(columns=["name", "model", "provider", "compute", "status", "minReplica", "maxReplica", "createdAt", "updatedAt"]) |
|
for endpoint in endpoints_json["items"]: |
|
endpoints_df = endpoints_df.append({ |
|
"name": endpoint["name"], |
|
"model": endpoint["model"]["repository"] + "@" + endpoint["model"]["revision"], |
|
"provider": endpoint["provider"]["vendor"] + "/" + endpoint["provider"]["region"], |
|
"compute": endpoint["compute"]["instanceType"] + "·" + endpoint["compute"]["instanceSize"] + " [" + endpoint["compute"]["accelerator"] + "]", |
|
"status": endpoint["status"]["state"], |
|
"minReplica": endpoint["compute"]["scaling"]["minReplica"], |
|
"maxReplica": endpoint["compute"]["scaling"]["maxReplica"], |
|
"createdAt": endpoint["status"]["createdAt"], |
|
"updatedAt": endpoint["status"]["updatedAt"], |
|
}, ignore_index=True) |
|
endpoints_df.columns = ["Endpoint Name", "Model Name @ Revision", "Provider", "Instance Type", "Status", "Min Replica", "Max Replica", "Created At", "Updated At"] |
|
return gr.DataFrame.update( |
|
value=endpoints_df, |
|
) |
|
else: |
|
return gr.Dropdown.update( |
|
choices=[endpoint["name"] for endpoint in endpoints_json["items"]], |
|
value=endpoints_json["items"][0]["name"], |
|
) |
|
|
|
def update_endpoint( |
|
hf_token_input, |
|
endpoint_name_input, |
|
min_node_selector, |
|
max_node_selector, |
|
instance_type, |
|
): |
|
payload ={ |
|
"compute": { |
|
"instanceSize": instance_type.split("·")[0].split("[")[1].split("]")[0], |
|
"instanceType": instance_type.split("·")[-1].strip(), |
|
"scaling": { |
|
"maxReplica": int(max_node_selector), |
|
"minReplica": int(min_node_selector) |
|
} |
|
}} |
|
payload = json.dumps(payload) |
|
response = requests.put( |
|
f"https://api.endpoints.huggingface.cloud/endpoint/{endpoint_name_input}", |
|
headers = { |
|
"Authorization": f"Bearer {hf_token_input.strip()}", |
|
"Content-Type": "application/json", |
|
}, |
|
data=payload, |
|
) |
|
if response.status_code == 401: |
|
return "Invalid token" |
|
elif response.status_code == 404: |
|
return f"Error: {response.text}" |
|
elif response.status_code == 202 or response.status_code == 200: |
|
return f"Endpoint {endpoint_name_input} updated successfully." |
|
else: |
|
return f"Something went wrong! - StatusCode:{response.status_code} - Error: {response.text}" |
|
|
|
def get_endpoint_logs( |
|
hf_token_input, |
|
endpoint_name_input, |
|
): |
|
response = requests.get( |
|
f"https://api.endpoints.huggingface.cloud/endpoint/{endpoint_name_input}/logs", |
|
headers = { |
|
"Authorization": f"Bearer {hf_token_input.strip()}", |
|
"Content-Type": "application/json", |
|
}) |
|
if response.status_code == 401: |
|
return "Invalid token or No logs found!" |
|
elif response.status_code == 200: |
|
return response.text |
|
elif response.status_code == 404: |
|
return f"Error: {response.text}" |
|
|
|
with gr.Blocks() as interface: |
|
gr.Markdown(""" |
|
#### Your 🤗 Access Token <span style="color:red;">(Required)</span> |
|
""") |
|
hf_token_input = gr.Textbox( |
|
show_label=False, |
|
placeholder="Your personal/organization Huggingface Access Token. Get it from (https://huggingface.co/settings/tokens)", |
|
type="password" |
|
) |
|
|
|
|
|
with gr.Tab("Info"): |
|
gr.Markdown(""" |
|
### All Deployed Endpoints |
|
""") |
|
endpoints_table = gr.Dataframe( |
|
headers=["Endpoint Name", "Model Name", "Provider", "Instance Type", "Status", "Min Replica", "Max Replica", "Created At", "Updated At"], |
|
col_count=(9, "fixed"), |
|
) |
|
endpoint_info_button = gr.Button(value="Get Info") |
|
|
|
|
|
with gr.Tab("Deploy Endpoint"): |
|
gr.Markdown( |
|
""" |
|
### <br><center style="color:green">(Deploy Your Model on 🤗 Endpoint)</center> |
|
""") |
|
gr.Markdown(""" |
|
#### Endpoint Name |
|
""") |
|
endpoint_name_input = gr.Textbox( |
|
show_label=False |
|
) |
|
|
|
with gr.Row(): |
|
gr.Markdown(""" |
|
#### Cloud Provider |
|
""") |
|
|
|
gr.Markdown(""" |
|
#### Cloud Region |
|
""") |
|
|
|
with gr.Row(): |
|
provider_selector = gr.Dropdown( |
|
choices=["aws", "azure"], |
|
value="", |
|
interactive=True, |
|
show_label=False, |
|
) |
|
|
|
region_selector = gr.Dropdown( |
|
[], |
|
value="", |
|
interactive=True, |
|
show_label=False, |
|
) |
|
|
|
with gr.Row(): |
|
gr.Markdown(""" |
|
#### Target Model e.g (openai/whisper-tiny) |
|
""") |
|
|
|
gr.Markdown(""" |
|
#### Branch commit hash e.g (ada5a5d516772e41f9aeb0f984df6ecc4620001f) |
|
""") |
|
|
|
with gr.Row(): |
|
repository_selector = gr.Textbox( |
|
value="", |
|
interactive=True, |
|
show_label=False, |
|
) |
|
|
|
revision_selector = gr.Textbox( |
|
value="", |
|
interactive=True, |
|
show_label=False, |
|
) |
|
|
|
with gr.Row(): |
|
gr.Markdown(""" |
|
#### Task |
|
""") |
|
|
|
gr.Markdown(""" |
|
#### Framework |
|
""") |
|
|
|
with gr.Row(): |
|
framework_selector = gr.Dropdown( |
|
choices = ["Custom", "Diffusers", "Transformers"], |
|
value="", |
|
interactive=True, |
|
show_label=False, |
|
) |
|
|
|
task_selector = gr.Dropdown( |
|
[], |
|
value="", |
|
interactive=True, |
|
show_label=False, |
|
) |
|
|
|
gr.Markdown(""" |
|
|
|
#### Select Compute Instance Type |
|
""") |
|
compute_selector = gr.Dropdown( |
|
[], |
|
value="", |
|
interactive=True, |
|
show_label=False, |
|
) |
|
|
|
with gr.Row(): |
|
gr.Markdown(""" |
|
#### Min Number of Nodes |
|
""") |
|
|
|
gr.Markdown(""" |
|
#### Max Number of Nodes |
|
""") |
|
|
|
gr.Markdown(""" |
|
#### Security Level |
|
""") |
|
|
|
with gr.Row(): |
|
min_node_selector = gr.Number( |
|
value=1, |
|
interactive=True, |
|
show_label=False, |
|
) |
|
|
|
max_node_selector = gr.Number( |
|
value=1, |
|
interactive=True, |
|
show_label=False, |
|
) |
|
|
|
security_selector = gr.Radio( |
|
choices=["Protected", "Public"], |
|
value="Protected", |
|
interactive=True, |
|
show_label=False, |
|
) |
|
submit_button = gr.Button( |
|
value="Submit", |
|
) |
|
|
|
status_txt = gr.Textbox( |
|
value="status", |
|
interactive=False |
|
) |
|
|
|
|
|
with gr.Tab("Update Endpoint"): |
|
gr.Markdown(""" |
|
### <br><center style="color:green">(Update 🔁 Endpoint)</center> |
|
""") |
|
update_endpoint_info_button = gr.Button(value="Load Endpoints 🔃") |
|
with gr.Row(): |
|
gr.Markdown(""" |
|
#### Cloud Provider |
|
""") |
|
|
|
gr.Markdown(""" |
|
#### Cloud Region |
|
""") |
|
|
|
with gr.Row(): |
|
update_provider_selector = gr.Dropdown( |
|
choices=["aws", "azure"], |
|
value="", |
|
interactive=True, |
|
show_label=False, |
|
) |
|
|
|
update_region_selector = gr.Dropdown( |
|
[], |
|
value="", |
|
interactive=True, |
|
show_label=False, |
|
) |
|
|
|
with gr.Row(): |
|
gr.Markdown(""" |
|
#### Endpoint Name |
|
""") |
|
gr.Markdown(""" |
|
#### Instance Type |
|
""") |
|
|
|
with gr.Row(): |
|
update_endpoint_name_input = gr.Dropdown( |
|
[], |
|
value="", |
|
show_label=False |
|
) |
|
update_compute_selector = gr.Dropdown( |
|
[], |
|
value="", |
|
interactive=True, |
|
show_label=False, |
|
) |
|
with gr.Row(): |
|
gr.Markdown(""" |
|
#### Min Number of Nodes |
|
""") |
|
gr.Markdown(""" |
|
#### Max Number of Nodes |
|
""") |
|
with gr.Row(): |
|
update_min_node_input = gr.Number( |
|
value=1, |
|
interactive=True, |
|
show_label=False, |
|
) |
|
|
|
update_max_node_input = gr.Number( |
|
value=1, |
|
interactive=True, |
|
show_label=False, |
|
) |
|
update_button = gr.Button( |
|
value="Update", |
|
) |
|
|
|
update_status_txt = gr.Textbox( |
|
value="status", |
|
interactive=False |
|
) |
|
|
|
|
|
with gr.Tab("Delete Endpoint"): |
|
gr.Markdown(""" |
|
### <br><center style="color:green">(Delete 🗑️ Endpoint)</center> |
|
""") |
|
delete_endpoint_info_button = gr.Button(value="Load Endpoints 🔃") |
|
gr.Markdown(""" |
|
#### Endpoint Name |
|
""") |
|
delete_endpoint_name_input = gr.Dropdown( |
|
[], |
|
value="", |
|
show_label=False |
|
) |
|
delete_button = gr.Button( |
|
value="Delete", |
|
) |
|
|
|
delete_status_txt = gr.Textbox( |
|
value="status", |
|
interactive=False |
|
) |
|
|
|
|
|
with gr.Tab("Endpoint Logs"): |
|
gr.Markdown(""" |
|
### <br><center style="color:green">(Endpoint 📖 Logs)</center> |
|
""") |
|
endpoint_logs_load_button = gr.Button(value="Load Endpoints 🔃") |
|
gr.Markdown(""" |
|
#### Endpoint Name |
|
""") |
|
endpoint_logs_selector = gr.Dropdown( |
|
[], |
|
value="", |
|
show_label=False |
|
) |
|
endpoint_logs = gr.Textbox( |
|
value="", |
|
interactive=False |
|
) |
|
endpoint_logs_button = gr.Button(value="Get Logs") |
|
|
|
|
|
with gr.Tab("Pricing Table"): |
|
gr.Markdown(""" |
|
### <br><center style="color:green">(Instance Pricing Table)</center> |
|
#### Pricing Table(CPU) - 2023/2/22 |
|
""") |
|
gr.Dataframe( |
|
headers=["provider", "size", "$/h", "vCPUs", "Memory", "Architecture"], |
|
datatype=["str", "str", "str", "number", "str", "str"], |
|
row_count=8, |
|
col_count=(6, "fixed"), |
|
value=[ |
|
["aws", "small", "$0.06", 1, "2GB", "Intel Xeon - Ice Lake"], |
|
["aws", "medium", "$0.12", 2, "4GB", "Intel Xeon - Ice Lake"], |
|
["aws", "large", "$0.24", 4, "8GB", "Intel Xeon - Ice Lake"], |
|
["aws", "xlarge", "$0.48", 8, "16GB", "Intel Xeon - Ice Lake"], |
|
["azure", "small", "$0.06", 1, "2GB", "Intel Xeon"], |
|
["azure", "medium", "$0.12", 2, "4GB", "Intel Xeon"], |
|
["azure", "large", "$0.24", 4, "8GB", "Intel Xeon"], |
|
["azure", "xlarge", "$0.48", 8, "16GB", "Intel Xeon"], |
|
] |
|
) |
|
|
|
gr.Markdown(""" |
|
#### Pricing Table(GPU) - 2023/2/22 |
|
""") |
|
|
|
gr.Dataframe( |
|
headers=["provider", "size", "$/h", "GPUs", "Memory", "Architecture"], |
|
datatype=["str", "str", "str", "number", "str", "str"], |
|
row_count=6, |
|
col_count=(6, "fixed"), |
|
value=[ |
|
["aws", "small", "$0.60", 1, "14GB", "NVIDIA T4"], |
|
["aws", "medium", "$1.30", 1, "24GB", "NVIDIA A10G"], |
|
["aws", "large", "$4.50", 4, "56GB", "NVIDIA T4"], |
|
["aws", "xlarge", "$6.50", 1, "80GB", "NVIDIA A100"], |
|
["aws", "xxlarge", "$7.00", 4, "96GB", "NVIDIA A10G"], |
|
["aws", "xxxlarge", "$45.0", 8, "640GB", "NVIDIA A100"], |
|
] |
|
) |
|
|
|
|
|
endpoint_info_button.click( |
|
get_all_endpoints, |
|
inputs=[hf_token_input, gr.TextArea(value="info", interactive=False, visible=False)], |
|
outputs=endpoints_table |
|
) |
|
|
|
|
|
framework_selector.change(update_task_options, inputs=framework_selector, outputs=task_selector) |
|
provider_selector.change(update_regions, inputs=provider_selector, outputs=region_selector) |
|
region_selector.change(update_compute_options, inputs=[provider_selector, region_selector], outputs=compute_selector) |
|
submit_button.click( |
|
submit, |
|
inputs=[ |
|
hf_token_input, |
|
endpoint_name_input, |
|
provider_selector, |
|
region_selector, |
|
repository_selector, |
|
revision_selector, |
|
task_selector, |
|
framework_selector, |
|
compute_selector, |
|
min_node_selector, |
|
max_node_selector, |
|
security_selector], |
|
outputs=status_txt) |
|
|
|
|
|
update_endpoint_info_button.click( |
|
get_all_endpoints, |
|
inputs=[hf_token_input, gr.TextArea(value="update", interactive=False, visible=False)], |
|
outputs=update_endpoint_name_input |
|
) |
|
update_provider_selector.change(update_regions, inputs=update_provider_selector, outputs=update_region_selector) |
|
update_region_selector.change(update_compute_options, inputs=[update_provider_selector, update_region_selector], outputs=update_compute_selector) |
|
update_button.click( |
|
update_endpoint, |
|
inputs=[ |
|
hf_token_input, |
|
update_endpoint_name_input, |
|
update_min_node_input, |
|
update_max_node_input, |
|
update_compute_selector |
|
], |
|
outputs=update_status_txt |
|
) |
|
|
|
|
|
delete_endpoint_info_button.click( |
|
get_all_endpoints, |
|
inputs=[hf_token_input, gr.TextArea(value="delete", interactive=False, visible=False)], |
|
outputs=delete_endpoint_name_input |
|
) |
|
delete_button.click( |
|
delete_endpoint, |
|
inputs=[ |
|
hf_token_input, |
|
delete_endpoint_name_input |
|
], |
|
outputs=delete_status_txt |
|
) |
|
|
|
|
|
endpoint_logs_load_button.click( |
|
get_all_endpoints, |
|
inputs=[hf_token_input, gr.TextArea(value="logs", interactive=False, visible=False)], |
|
outputs=endpoint_logs_selector |
|
) |
|
endpoint_logs_button.click( |
|
get_endpoint_logs, |
|
inputs=[ |
|
hf_token_input, |
|
endpoint_logs_selector |
|
], |
|
outputs=endpoint_logs |
|
) |
|
|
|
interface.launch() |