import os import time, json import gradio as gr import http.client url = os.environ['cnvrg_url'] token = os.environ['cnvrg_token'] def generate_output(prompt,maxtokens=20): payload = f"{maxtokens}#{prompt}" conn = http.client.HTTPSConnection(url, 443) payload = '{"input_params": "'+payload+'"}' headers = { 'Cnvrg-Api-Key': token, 'Content-Type': "application/json" } tic = time.time() conn.request("POST", "/api/v1/endpoints/nkjpkfaw14saekgsvudy", payload, headers) res = conn.getresponse() data = res.read() toc = time.time() raw_data = data.decode("utf-8") json_data = json.loads(raw_data) return json_data["prediction"], str(round(toc-tic,3))+"s" input_text = gr.inputs.Textbox(label="Enter Prompt") slider = gr.inputs.Slider(minimum=1, maximum=75, step=1, default=20, label="Max New Tokens") output_text = gr.outputs.Textbox(label="Response") time_text = gr.outputs.Label(label="Time Taken") title = "SPR - Cnvrg.io | Serving" description = "Model - Falcon 7B (Instruct) - FP32" gr.Interface(fn=generate_output, inputs=[input_text,slider], outputs=[output_text,time_text], title=title, description=description).launch()