import gradio as gr import json import requests import pandas as pd def update_task_options(framework): config = { "Custom":["Custom"], "Diffusers":["text-to-image"], "Transformers":[ "text-classification", "zero-shot-classification", "token-classification", "question-answering", "fill-mask", "summarization", "translation", "text2text-generation", "text-generation", "feature-extraction", "image-classification", "automatic-speech-recognition", "audio-classification", "object-detection", "image-segmentation", "table-question-answering" ] } return gr.Dropdown.update( choices=config[framework], value=config[framework][0] if len(config[framework]) > 0 else None ) def update_regions(provider): available_regions = [] headers = { "Content-Type": "application/json", } endpoint_url = f"https://api.endpoints.huggingface.cloud/provider/{provider}/region" response = requests.get(endpoint_url, headers=headers) for region in response.json()['items']: if region['status'] == 'available': available_regions.append(f"{region['region']}/{region['label']}") return gr.Dropdown.update( choices=available_regions, value=available_regions[0] if len(available_regions) > 0 else None ) def update_compute_options(provider, region): region = region.split("/")[0] available_compute_choices = [] headers = { "Content-Type": "application/json", } endpoint_url = f"https://api.endpoints.huggingface.cloud/provider/{provider}/region/{region}/compute" response = requests.get(endpoint_url, headers=headers) for compute in response.json()['items']: if compute['status'] == 'available': accelerator = compute['accelerator'] numAccelerators = compute['numAccelerators'] memoryGb = compute['memoryGb'].replace("Gi", "GB") architecture = compute['architecture'] instanceType = compute['instanceType'] type = f"{numAccelerators}vCPU {memoryGb} · {architecture}" if accelerator == "cpu" else f"{numAccelerators}x {architecture}" available_compute_choices.append( f"{compute['accelerator'].upper()} [{compute['instanceSize']}] · {type} · {instanceType}" ) return gr.Dropdown.update( choices=available_compute_choices, value=available_compute_choices[0] if len(available_compute_choices) > 0 else None ) def submit( hf_token_input, endpoint_name_input, provider_selector, region_selector, repository_selector, revision_selector, task_selector, framework_selector, compute_selector, min_node_selector, max_node_selector, security_selector ): compute_resources = compute_selector.split("·") accelerator = compute_resources[0][:3].strip() size_l_index = compute_resources[0].index("[") - 1 size_r_index = compute_resources[0].index("]") size = compute_resources[0][size_l_index : size_r_index].strip() type = compute_resources[-1].strip() payload = { "accountId": repository_selector.split("/")[0], "compute": { "accelerator": accelerator.lower(), "instanceSize": size[1:], "instanceType": type, "scaling": { "maxReplica": int(max_node_selector), "minReplica": int(min_node_selector) } }, "model": { "framework": "custom", "image": { "huggingface": {} }, "repository": repository_selector.lower(), "revision": revision_selector, "task": task_selector.lower() }, "name": endpoint_name_input.strip(), "provider": { "region": region_selector.split("/")[0].lower(), "vendor": provider_selector.lower() }, "type": security_selector.lower() } payload = json.dumps(payload) headers = { "Authorization": f"Bearer {hf_token_input.strip()}", "Content-Type": "application/json", } endpoint_url = f"https://api.endpoints.huggingface.cloud/endpoint" print(f"Endpoint: {endpoint_url}") response = requests.post(endpoint_url, headers=headers, data=payload) if response.status_code == 400: return f"{response.text}. Malformed data in {payload}" elif response.status_code == 401: return "Invalid token" elif response.status_code == 409: return f"Error: {response.text}" elif response.status_code == 202: return f"Endpoint {endpoint_name_input} created successfully on {provider_selector.lower()} using {repository_selector.lower()}@{revision_selector}. \n Please check out the progress at https://ui.endpoints.huggingface.co/endpoints." else: return f"Something went wrong! - StatusCode:{response.status_code} - Error: {response.text}" def delete_endpoint( hf_token_input, endpoint_name_input ): response = requests.delete( f"https://api.endpoints.huggingface.cloud/endpoint/{endpoint_name_input}", headers = { "Authorization": f"Bearer {hf_token_input.strip()}", "Content-Type": "application/json", } ) if response.status_code == 401: return "Invalid token" elif response.status_code == 404: return f"Error: {response.text}" elif response.status_code == 202 or response.status_code == 200: return f"Endpoint {endpoint_name_input} deleted successfully." else: return f"Something went wrong! - StatusCode:{response.status_code} - Error: {response.text}" def get_all_endpoints( hf_token_input, method, ): response = requests.get( f"https://api.endpoints.huggingface.cloud/endpoint", headers = { "Authorization": f"Bearer {hf_token_input.strip()}", "Content-Type": "application/json", }) if response.status_code == 401: if method == "info": return gr.DataFrame.update( value=pd.DataFrame({"Error":["Invalid Token -OR- No Endpoint Found !"]}), ) else: return gr.Dropdown.update( value="Invalid token or No endpoints found!", ) elif response.status_code == 200: endpoints_json = response.json() if method == "info": endpoints_df = pd.DataFrame(columns=["name", "model", "provider", "compute", "status", "minReplica", "maxReplica", "createdAt", "updatedAt"]) for endpoint in endpoints_json["items"]: endpoints_df = endpoints_df.append({ "name": endpoint["name"], "model": endpoint["model"]["repository"] + "@" + endpoint["model"]["revision"], "provider": endpoint["provider"]["vendor"] + "/" + endpoint["provider"]["region"], "compute": endpoint["compute"]["instanceType"] + "·" + endpoint["compute"]["instanceSize"] + " [" + endpoint["compute"]["accelerator"] + "]", "status": endpoint["status"]["state"], "minReplica": endpoint["compute"]["scaling"]["minReplica"], "maxReplica": endpoint["compute"]["scaling"]["maxReplica"], "createdAt": endpoint["status"]["createdAt"], "updatedAt": endpoint["status"]["updatedAt"], }, ignore_index=True) endpoints_df.columns = ["Endpoint Name", "Model Name @ Revision", "Provider", "Instance Type", "Status", "Min Replica", "Max Replica", "Created At", "Updated At"] return gr.DataFrame.update( value=endpoints_df, ) else: return gr.Dropdown.update( choices=[endpoint["name"] for endpoint in endpoints_json["items"]], value=endpoints_json["items"][0]["name"], ) def update_endpoint( hf_token_input, endpoint_name_input, min_node_selector, max_node_selector, instance_type, ): payload ={ "compute": { "instanceSize": instance_type.split("·")[0].split("[")[1].split("]")[0], "instanceType": instance_type.split("·")[-1].strip(), "scaling": { "maxReplica": int(max_node_selector), "minReplica": int(min_node_selector) } }} payload = json.dumps(payload) response = requests.put( f"https://api.endpoints.huggingface.cloud/endpoint/{endpoint_name_input}", headers = { "Authorization": f"Bearer {hf_token_input.strip()}", "Content-Type": "application/json", }, data=payload, ) if response.status_code == 401: return "Invalid token" elif response.status_code == 404: return f"Error: {response.text}" elif response.status_code == 202 or response.status_code == 200: return f"Endpoint {endpoint_name_input} updated successfully." else: return f"Something went wrong! - StatusCode:{response.status_code} - Error: {response.text}" def get_endpoint_logs( hf_token_input, endpoint_name_input, ): response = requests.get( f"https://api.endpoints.huggingface.cloud/endpoint/{endpoint_name_input}/logs", headers = { "Authorization": f"Bearer {hf_token_input.strip()}", "Content-Type": "application/json", }) if response.status_code == 401: return "Invalid token or No logs found!" elif response.status_code == 200: return response.text elif response.status_code == 404: return f"Error: {response.text}" with gr.Blocks() as interface: gr.Markdown(""" #### Your 🤗 Access Token (Required) """) hf_token_input = gr.Textbox( show_label=False, placeholder="Your personal/organization Huggingface Access Token. Get it from (https://huggingface.co/settings/tokens)", type="password" ) # Get All Endpoints Info with gr.Tab("Info"): gr.Markdown(""" ### All Deployed Endpoints """) endpoints_table = gr.Dataframe( headers=["Endpoint Name", "Model Name", "Provider", "Instance Type", "Status", "Min Replica", "Max Replica", "Created At", "Updated At"], col_count=(9, "fixed"), ) endpoint_info_button = gr.Button(value="Get Info") # Deploy Endpoint with gr.Tab("Deploy Endpoint"): gr.Markdown( """ ###
(Deploy Your Model on 🤗 Endpoint)
""") gr.Markdown(""" #### Endpoint Name """) endpoint_name_input = gr.Textbox( show_label=False ) with gr.Row(): gr.Markdown(""" #### Cloud Provider """) gr.Markdown(""" #### Cloud Region """) with gr.Row(): provider_selector = gr.Dropdown( choices=["aws", "azure"], value="", interactive=True, show_label=False, ) region_selector = gr.Dropdown( [], value="", interactive=True, show_label=False, ) with gr.Row(): gr.Markdown(""" #### Target Model e.g (openai/whisper-tiny) """) gr.Markdown(""" #### Branch commit hash e.g (ada5a5d516772e41f9aeb0f984df6ecc4620001f) """) with gr.Row(): repository_selector = gr.Textbox( value="", interactive=True, show_label=False, ) revision_selector = gr.Textbox( value="", interactive=True, show_label=False, ) with gr.Row(): gr.Markdown(""" #### Task """) gr.Markdown(""" #### Framework """) with gr.Row(): framework_selector = gr.Dropdown( choices = ["Custom", "Diffusers", "Transformers"], value="", interactive=True, show_label=False, ) task_selector = gr.Dropdown( [], value="", interactive=True, show_label=False, ) gr.Markdown(""" #### Select Compute Instance Type """) compute_selector = gr.Dropdown( [], value="", interactive=True, show_label=False, ) with gr.Row(): gr.Markdown(""" #### Min Number of Nodes """) gr.Markdown(""" #### Max Number of Nodes """) gr.Markdown(""" #### Security Level """) with gr.Row(): min_node_selector = gr.Number( value=1, interactive=True, show_label=False, ) max_node_selector = gr.Number( value=1, interactive=True, show_label=False, ) security_selector = gr.Radio( choices=["Protected", "Public"], value="Protected", interactive=True, show_label=False, ) submit_button = gr.Button( value="Submit", ) status_txt = gr.Textbox( value="status", interactive=False ) # Update Endpoint with gr.Tab("Update Endpoint"): gr.Markdown(""" ###
(Update 🔁 Endpoint)
""") update_endpoint_info_button = gr.Button(value="Load Endpoints 🔃") with gr.Row(): gr.Markdown(""" #### Cloud Provider """) gr.Markdown(""" #### Cloud Region """) with gr.Row(): update_provider_selector = gr.Dropdown( choices=["aws", "azure"], value="", interactive=True, show_label=False, ) update_region_selector = gr.Dropdown( [], value="", interactive=True, show_label=False, ) with gr.Row(): gr.Markdown(""" #### Endpoint Name """) gr.Markdown(""" #### Instance Type """) with gr.Row(): update_endpoint_name_input = gr.Dropdown( [], value="", show_label=False ) update_compute_selector = gr.Dropdown( [], value="", interactive=True, show_label=False, ) with gr.Row(): gr.Markdown(""" #### Min Number of Nodes """) gr.Markdown(""" #### Max Number of Nodes """) with gr.Row(): update_min_node_input = gr.Number( value=1, interactive=True, show_label=False, ) update_max_node_input = gr.Number( value=1, interactive=True, show_label=False, ) update_button = gr.Button( value="Update", ) update_status_txt = gr.Textbox( value="status", interactive=False ) # Delete Endpoint with gr.Tab("Delete Endpoint"): gr.Markdown(""" ###
(Delete 🗑️ Endpoint)
""") delete_endpoint_info_button = gr.Button(value="Load Endpoints 🔃") gr.Markdown(""" #### Endpoint Name """) delete_endpoint_name_input = gr.Dropdown( [], value="", show_label=False ) delete_button = gr.Button( value="Delete", ) delete_status_txt = gr.Textbox( value="status", interactive=False ) # Endpoint logs with gr.Tab("Endpoint Logs"): gr.Markdown(""" ###
(Endpoint 📖 Logs)
""") endpoint_logs_load_button = gr.Button(value="Load Endpoints 🔃") gr.Markdown(""" #### Endpoint Name """) endpoint_logs_selector = gr.Dropdown( [], value="", show_label=False ) endpoint_logs = gr.Textbox( value="", interactive=False ) endpoint_logs_button = gr.Button(value="Get Logs") # Pricing Table with gr.Tab("Pricing Table"): gr.Markdown(""" ###
(Instance Pricing Table)
#### Pricing Table(CPU) - 2023/2/22 """) gr.Dataframe( headers=["provider", "size", "$/h", "vCPUs", "Memory", "Architecture"], datatype=["str", "str", "str", "number", "str", "str"], row_count=8, col_count=(6, "fixed"), value=[ ["aws", "small", "$0.06", 1, "2GB", "Intel Xeon - Ice Lake"], ["aws", "medium", "$0.12", 2, "4GB", "Intel Xeon - Ice Lake"], ["aws", "large", "$0.24", 4, "8GB", "Intel Xeon - Ice Lake"], ["aws", "xlarge", "$0.48", 8, "16GB", "Intel Xeon - Ice Lake"], ["azure", "small", "$0.06", 1, "2GB", "Intel Xeon"], ["azure", "medium", "$0.12", 2, "4GB", "Intel Xeon"], ["azure", "large", "$0.24", 4, "8GB", "Intel Xeon"], ["azure", "xlarge", "$0.48", 8, "16GB", "Intel Xeon"], ] ) gr.Markdown(""" #### Pricing Table(GPU) - 2023/2/22 """) gr.Dataframe( headers=["provider", "size", "$/h", "GPUs", "Memory", "Architecture"], datatype=["str", "str", "str", "number", "str", "str"], row_count=6, col_count=(6, "fixed"), value=[ ["aws", "small", "$0.60", 1, "14GB", "NVIDIA T4"], ["aws", "medium", "$1.30", 1, "24GB", "NVIDIA A10G"], ["aws", "large", "$4.50", 4, "56GB", "NVIDIA T4"], ["aws", "xlarge", "$6.50", 1, "80GB", "NVIDIA A100"], ["aws", "xxlarge", "$7.00", 4, "96GB", "NVIDIA A10G"], ["aws", "xxxlarge", "$45.0", 8, "640GB", "NVIDIA A100"], ] ) # Info Tab Events endpoint_info_button.click( get_all_endpoints, inputs=[hf_token_input, gr.TextArea(value="info", interactive=False, visible=False)], outputs=endpoints_table ) # Deploy Tab Events framework_selector.change(update_task_options, inputs=framework_selector, outputs=task_selector) provider_selector.change(update_regions, inputs=provider_selector, outputs=region_selector) region_selector.change(update_compute_options, inputs=[provider_selector, region_selector], outputs=compute_selector) submit_button.click( submit, inputs=[ hf_token_input, endpoint_name_input, provider_selector, region_selector, repository_selector, revision_selector, task_selector, framework_selector, compute_selector, min_node_selector, max_node_selector, security_selector], outputs=status_txt) # Update Tab Events update_endpoint_info_button.click( get_all_endpoints, inputs=[hf_token_input, gr.TextArea(value="update", interactive=False, visible=False)], outputs=update_endpoint_name_input ) update_provider_selector.change(update_regions, inputs=update_provider_selector, outputs=update_region_selector) update_region_selector.change(update_compute_options, inputs=[update_provider_selector, update_region_selector], outputs=update_compute_selector) update_button.click( update_endpoint, inputs=[ hf_token_input, update_endpoint_name_input, update_min_node_input, update_max_node_input, update_compute_selector ], outputs=update_status_txt ) # Delete Tab Events delete_endpoint_info_button.click( get_all_endpoints, inputs=[hf_token_input, gr.TextArea(value="delete", interactive=False, visible=False)], outputs=delete_endpoint_name_input ) delete_button.click( delete_endpoint, inputs=[ hf_token_input, delete_endpoint_name_input ], outputs=delete_status_txt ) # Endpoint Logs Tab Events endpoint_logs_load_button.click( get_all_endpoints, inputs=[hf_token_input, gr.TextArea(value="logs", interactive=False, visible=False)], outputs=endpoint_logs_selector ) endpoint_logs_button.click( get_endpoint_logs, inputs=[ hf_token_input, endpoint_logs_selector ], outputs=endpoint_logs ) #interface.queue(concurrency_count=5) interface.launch()