Spaces:

pandada8
/

llm-api-latency

Sleeping

App Files Files Community

llm-api-latency / main.py

pandada8

init

d6bbae3 4 months ago

raw

history blame contribute delete

2.11 kB

	import pandas as pd
	import gradio as gr
	import plotly.express as px
	import polars as pl

	all_data = pl.read_parquet("data.parquet")

	def show_plot(event: gr.SelectData, df):
	row = event.index[0]
	print(event.value)
	print(event.index)
	print(event.target)
	print(all_data[row])
	model, backend, prompt, completion_tokens, total_time_used = all_data[row, 'model'], all_data[row, 'backend'], all_data[row, 'prompt'], all_data[row, 'completion_tokens'], all_data[row, 'total_time_used']
	chunks = (
	all_data[row].select(pl.col('raw_chunks').explode())
	.select(latency=pl.col('raw_chunks').struct.field('1')/1e3, payload=pl.col('raw_chunks').struct.field('2'))
	.with_columns(diff=pl.col('latency').diff())
	.with_row_count(name='n')
	)
	tps = completion_tokens / total_time_used * 1e3
	return (
	px.scatter(chunks, x='latency', y='n', title=f'{model} - {backend}<br><sup>{prompt}</sup>', trendline='ols'),
	px.scatter(chunks, x='n', y='diff', title=f'{model} - {backend} {tps:.2f} token/s <br><sup>{prompt}</sup>', trendline='ols'),
	chunks.drop('n')
	)

	def get_overview(model):
	df = all_data.filter(pl.col('model') == model).with_columns(date=pl.col('datetime').dt.round('1h'), tps=pl.col('completion_tokens') / pl.col('total_time_used') * 1e3)
	return px.line(df, x='date', y='tps', color='backend', symbol='backend')

	with gr.Blocks() as demo:
	with gr.Tab("overview"):
	gr.Markdown("deepseek-v3 overview")
	gr.Plot(get_overview('deepseek-v3'))
	gr.Markdown('deepseek-r1 overview')
	gr.Plot(get_overview('deepseek-r1'))

	with gr.Tab("Traces"):
	with gr.Row():
	df = gr.DataFrame(all_data.select('datetime', 'backend', 'model', 'prompt'), interactive=False, show_search='filter')
	with gr.Row():
	plot = gr.Plot()
	plot2 = gr.Plot()
	with gr.Row():
	detail_df = gr.DataFrame()
	df.select(fn=show_plot, inputs=df, outputs=[plot, plot2, detail_df])

	if __name__ == "__main__":
	demo.launch()