import gradio as gr gr.Interface.load("models/microsoft/bloom-deepspeed-inference-fp16").launch()