import pandas as pd import gradio as gr import plotly.express as px import polars as pl all_data = pl.read_parquet("data.parquet") def show_plot(event: gr.SelectData, df): row = event.index[0] print(event.value) print(event.index) print(event.target) print(all_data[row]) model, backend, prompt, completion_tokens, total_time_used = all_data[row, 'model'], all_data[row, 'backend'], all_data[row, 'prompt'], all_data[row, 'completion_tokens'], all_data[row, 'total_time_used'] chunks = ( all_data[row].select(pl.col('raw_chunks').explode()) .select(latency=pl.col('raw_chunks').struct.field('1')/1e3, payload=pl.col('raw_chunks').struct.field('2')) .with_columns(diff=pl.col('latency').diff()) .with_row_count(name='n') ) tps = completion_tokens / total_time_used * 1e3 return ( px.scatter(chunks, x='latency', y='n', title=f'{model} - {backend}
{prompt}', trendline='ols'), px.scatter(chunks, x='n', y='diff', title=f'{model} - {backend} {tps:.2f} token/s
{prompt}', trendline='ols'), chunks.drop('n') ) def get_overview(model): df = all_data.filter(pl.col('model') == model).with_columns(date=pl.col('datetime').dt.round('1h'), tps=pl.col('completion_tokens') / pl.col('total_time_used') * 1e3) return px.line(df, x='date', y='tps', color='backend', symbol='backend') with gr.Blocks() as demo: with gr.Tab("overview"): gr.Markdown("deepseek-v3 overview") gr.Plot(get_overview('deepseek-v3')) gr.Markdown('deepseek-r1 overview') gr.Plot(get_overview('deepseek-r1')) with gr.Tab("Traces"): with gr.Row(): df = gr.DataFrame(all_data.select('datetime', 'backend', 'model', 'prompt'), interactive=False, show_search='filter') with gr.Row(): plot = gr.Plot() plot2 = gr.Plot() with gr.Row(): detail_df = gr.DataFrame() df.select(fn=show_plot, inputs=df, outputs=[plot, plot2, detail_df]) if __name__ == "__main__": demo.launch()