import gradio as gr import models import pandas as pd import theme import matplotlib.pyplot as plt text = "

TCO Comparison Calculator" text2 = "Please note that the cost/request only defines the infrastructure cost for deployment. The labor cost must be added for the whole AI model service deployment TCO." description=f"""

In this demo application, we help you compare different AI model services, such as Open source or SaaS solutions, based on the Total Cost of Ownership for their deployment.

Please note that we focus on getting the service up and running, but not the maintenance that follows.

""" formula = r""" $CR = \frac{CIT\_1K \times IT + COT\_1K \times OT}{1000}$
with:
CR = Cost per Request
CIT_1K = Cost per 1000 Input Tokens
COT_1K = Cost per 1000 Output Tokens
IT = Input Tokens
OT = Output Tokens """ def on_use_case_change(use_case): if use_case == "Summarize": return gr.update(value=500), gr.update(value=200) elif use_case == "Question-Answering": return gr.update(value=300), gr.update(value=300) else: return gr.update(value=50), gr.update(value=10) def compare_info(tco1, tco2, dropdown, dropdown2): #Compute the cost/request ratio r = tco1 / tco2 if r < 1: comparison_result = f"""The cost/request of the second {dropdown2} service is {1/r:.5f} times more expensive than the one of the first {dropdown} service.""" elif r > 1: comparison_result = f"""The cost/request of the second {dropdown2} service is {r:.5f} times cheaper than the one of the first {dropdown} service.""" else: comparison_result = f"""Both solutions have the same cost/request.""" # Create a bar chart services = [dropdown, dropdown2] costs_to_compare = [tco1, tco2] plt.figure(figsize=(6, 4)) plt.bar(services, costs_to_compare, color=['red', 'green']) plt.xlabel('AI option services', fontsize=10) plt.ylabel('($) Cost/Request', fontsize=10) plt.title('Comparison of Cost/Request', fontsize=14) plt.tight_layout() plt.savefig('cost_comparison.png') # Save to a file return gr.update(value='cost_comparison.png'), comparison_result def create_table(tco1, tco2, labor_cost1, labor_cost2, dropdown, dropdown2, latency, latency2): list_values = [] first_sol = [tco1, labor_cost1, latency] second_sol = [tco2, labor_cost2, latency2] list_values.append(first_sol) list_values.append(second_sol) data = pd.DataFrame(list_values, index=[dropdown, dropdown2], columns=["Cost/request ($) ", "Labor Cost ($/month)", "Average latency (s)"]) formatted_data = data.copy() formatted_data["Cost/request ($) "] = formatted_data["Cost/request ($) "].apply('{:.5f}'.format) formatted_data["Labor Cost ($/month)"] = formatted_data["Labor Cost ($/month)"].apply('{:.0f}'.format) styled_data = formatted_data.style\ .set_properties(**{'background-color': '#ffffff', 'color': '#000000', 'border-color': '#e0e0e0', 'border-width': '1px', 'border-style': 'solid'})\ .to_html() centered_styled_data = f"
{styled_data}
" return gr.update(value=centered_styled_data) def update_plot(tco1, tco2, dropdown, dropdown2, labour_cost1, labour_cost2): request_ranges = list(range(0, 1001, 100)) + list(range(1000, 10001, 500)) + list(range(10000, 100001, 1000)) + list(range(100000, 2000001, 100000)) costs_tco1 = [(tco1 * req + labour_cost1) for req in request_ranges] costs_tco2 = [(tco2 * req + labour_cost2) for req in request_ranges] data = pd.DataFrame({ "Number of requests": request_ranges * 2, "Cost ($)": costs_tco1 + costs_tco2, "AI model service": ["1)" + " " + dropdown] * len(request_ranges) + ["2)" + " " + dropdown2] * len(request_ranges) } ) return gr.LinePlot.update(data, visible=True, x="Number of requests", y="Cost ($)",color="AI model service",color_legend_position="bottom", title="Set-up TCO for one month", height=300, width=500, tooltip=["Number of requests", "Cost ($)", "AI model service"]) style = theme.Style() with gr.Blocks(theme=style) as demo: Models: list[models.BaseTCOModel] = [models.OpenAIModelGPT4, models.OpenAIModelGPT3_5, models.CohereModel, models.OpenSourceLlama2Model] model_names = [Model().get_name() for Model in Models] gr.Markdown(value=text) gr.Markdown(value=description) with gr.Row(): with gr.Column(): with gr.Row(): use_case = gr.Dropdown(["Summarize", "Question-Answering", "Classification"], value="Question-Answering", label=" Describe your use case ") with gr.Accordion("Click here if you want to customize the number of input and output tokens per request", open=False): with gr.Row(): input_tokens = gr.Slider(minimum=1, maximum=1000, value=300, step=1, label=" Input tokens per request", info="We suggest a value that we believe best suit your use case choice but feel free to adjust", interactive=True) output_tokens = gr.Slider(minimum=1, maximum=1000, value=300, step=1, label=" Output tokens per request", info="We suggest a value that we believe best suit your use case choice but feel free to adjust", interactive=True) with gr.Row(visible=False): num_users = gr.Number(value="1000", interactive = True, label=" Number of users for your service ") use_case.change(on_use_case_change, inputs=use_case, outputs=[input_tokens, output_tokens]) with gr.Row(): with gr.Column(): page1 = models.ModelPage(Models) dropdown = gr.Dropdown(model_names, interactive=True, label=" First AI service option ") with gr.Accordion("Click here for more information on the computation parameters for your first AI service option", open=False): page1.render() with gr.Column(): page2 = models.ModelPage(Models) dropdown2 = gr.Dropdown(model_names, interactive=True, label=" Second AI service option ") with gr.Accordion("Click here for more information on the computation parameters for your second AI service option", open=False): page2.render() dropdown.change(page1.make_model_visible, inputs=[dropdown, use_case], outputs=page1.get_all_components()) dropdown2.change(page2.make_model_visible, inputs=[dropdown2, use_case], outputs=page2.get_all_components()) compute_tco_btn = gr.Button("Compute & Compare", size="lg", variant="primary", scale=1) tco1 = gr.State() tco2 = gr.State() labor_cost1 = gr.State() labor_cost2 = gr.State() latency = gr.State() latency2 = gr.State() with gr.Row(): with gr.Accordion("Click here to see the cost/request computation formula", open=False): tco_formula = gr.Markdown(formula) with gr.Row(variant='panel'): with gr.Column(): with gr.Row(): table = gr.Markdown() with gr.Row(): info = gr.Markdown(text2) with gr.Row(): with gr.Column(scale=1): image = gr.Image() ratio = gr.Markdown() with gr.Column(scale=2): plot = gr.LinePlot(visible=False) compute_tco_btn.click(page1.compute_cost_per_token, inputs=page1.get_all_components_for_cost_computing() + [dropdown, input_tokens, output_tokens], outputs=[tco1, latency, labor_cost1]).then(page2.compute_cost_per_token, inputs=page2.get_all_components_for_cost_computing() + [dropdown2, input_tokens, output_tokens], outputs=[tco2, latency2, labor_cost2]).then(create_table, inputs=[tco1, tco2, labor_cost1, labor_cost2, dropdown, dropdown2, latency, latency2], outputs=table).then(compare_info, inputs=[tco1, tco2, dropdown, dropdown2], outputs=[image, ratio]).then(update_plot, inputs=[tco1, tco2, dropdown, dropdown2, labor_cost1, labor_cost2], outputs=plot) demo.launch(debug=True)