alishabhale commited on
Commit
5a948c5
Β·
verified Β·
1 Parent(s): f6debe6

Updated with deepseek model

Browse files
Files changed (1) hide show
  1. app.py +60 -42
app.py CHANGED
@@ -1,56 +1,74 @@
1
  import os
2
  import gradio as gr
3
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
4
 
5
  # βœ… Function to analyze CSV data based on accuracy
6
  def analyze_csv(file):
7
- df = pd.read_csv(file.name) # Read uploaded CSV
8
-
9
- # βœ… Ensure column names are stripped of extra spaces
10
- df.columns = df.columns.str.strip()
11
-
12
- # βœ… Validate required columns
13
- required_columns = {"Host Name", "Workload", "Threads", "TPS", "95_Percentile_Latency", "CPU Utilization"}
14
- if not required_columns.issubset(df.columns):
15
- return "Error: Missing one or more required columns in the uploaded CSV."
16
-
17
- # βœ… Convert necessary columns to numeric values (handling errors)
18
- numeric_cols = ["TPS", "95_Percentile_Latency", "CPU Utilization"]
19
- df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors="coerce")
20
-
21
- # βœ… Handle missing or invalid values by dropping rows with NaN
22
- df = df.dropna(subset=numeric_cols)
23
-
24
- # βœ… Prevent division by zero
25
- df["95_Percentile_Latency"] = df["95_Percentile_Latency"].replace(0, 1e-6)
26
- df["CPU Utilization"] = df["CPU Utilization"].replace(0, 1e-6)
27
-
28
- # βœ… Calculate Accuracy Score
29
- df["Accuracy Score"] = df["TPS"] / (df["95_Percentile_Latency"] * df["CPU Utilization"])
30
-
31
- # βœ… Find the best-performing test run
32
- best_run = df.loc[df["Accuracy Score"].idxmax()]
33
-
34
- # βœ… Format output
35
- result = f"""
36
- **Benchmark Analysis Results:**
37
- - **Best Performing Test Run:** {best_run["Host Name"]} - {best_run["Workload"]} (Threads: {best_run["Threads"]})
38
- - **Accuracy Score:** {best_run["Accuracy Score"]:.4f}
39
- - **TPS:** {best_run["TPS"]}
40
- - **95th Percentile Latency:** {best_run["95_Percentile_Latency"]} ms
41
- - **CPU Utilization:** {best_run["CPU Utilization"]}%
42
- """
43
-
44
- return result
 
 
 
 
 
 
 
 
 
45
 
46
  # βœ… Gradio Interface
47
  iface = gr.Interface(
48
  fn=analyze_csv,
49
  inputs=gr.File(label="Upload CSV File"),
50
  outputs="text",
51
- title="Benchmark Accuracy Analyzer",
52
- description="Upload a CSV file containing benchmark test results. The tool will calculate accuracy scores and determine the best-performing test run.",
53
  )
54
 
55
- # βœ… Launch the Gradio app
56
- iface.launch()
 
1
  import os
2
  import gradio as gr
3
  import pandas as pd
4
+ import torch
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
6
+
7
+ # βœ… Use DeepSeek Free Model
8
+ model_name = "deepseek-ai/deepseek-coder-6.7b"
9
+
10
+ # βœ… Load DeepSeek model & tokenizer
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
13
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
14
 
15
  # βœ… Function to analyze CSV data based on accuracy
16
  def analyze_csv(file):
17
+ try:
18
+ df = pd.read_csv(file.name) # Read uploaded CSV
19
+
20
+ # βœ… Ensure column names are stripped of extra spaces
21
+ df.columns = df.columns.str.strip()
22
+
23
+ # βœ… Validate required columns
24
+ required_columns = {"Run ID", "Latency (ms)", "Throughput (req/sec)", "Memory Usage (GB)", "CPU Utilization (%)"}
25
+ if not required_columns.issubset(df.columns):
26
+ return f"Error: Missing one or more required columns. Required: {', '.join(required_columns)}"
27
+
28
+ # βœ… Avoid division errors (replace zero values in Latency & Memory Usage)
29
+ df["Latency (ms)"].replace(0, 1e-6, inplace=True)
30
+ df["Memory Usage (GB)"].replace(0, 1e-6, inplace=True)
31
+
32
+ # βœ… Calculate Accuracy Score: Throughput / (Latency * Memory Usage)
33
+ df["Accuracy Score"] = df["Throughput (req/sec)"] / (df["Latency (ms)"] * df["Memory Usage (GB)"])
34
+
35
+ # βœ… Find the best-performing model
36
+ best_model = df.loc[df["Accuracy Score"].idxmax()]
37
+ best_run_id = best_model["Run ID"]
38
+
39
+ # βœ… Construct analysis summary
40
+ summary = f"""
41
+ **πŸ† Best Performing Test Run:** `{best_run_id}`
42
+
43
+ - **Latency:** {best_model["Latency (ms)"]} ms
44
+ - **Throughput:** {best_model["Throughput (req/sec)"]} req/sec
45
+ - **Memory Usage:** {best_model["Memory Usage (GB)"]} GB
46
+ - **CPU Utilization:** {best_model["CPU Utilization (%)"]}%
47
+ - **Accuracy Score:** {best_model["Accuracy Score"]:.6f}
48
+ ---
49
+ **πŸ“Š Accuracy Ranking Table**
50
+ ```plaintext
51
+ {df[["Run ID", "Accuracy Score"]].sort_values(by="Accuracy Score", ascending=False).to_string(index=False)}
52
+ ```
53
+ ---
54
+ Based on this benchmark, generate insights on why this test run performed best and provide recommendations.
55
+ """
56
+
57
+ # βœ… Generate AI-based insights using DeepSeek
58
+ output = pipe(summary, max_new_tokens=150, do_sample=True, temperature=0.7)
59
+
60
+ return f"{summary}\n\n### πŸ€– AI Insights:\n{output[0]['generated_text']}"
61
+
62
+ except Exception as e:
63
+ return f"⚠️ Error processing CSV: {str(e)}"
64
 
65
  # βœ… Gradio Interface
66
  iface = gr.Interface(
67
  fn=analyze_csv,
68
  inputs=gr.File(label="Upload CSV File"),
69
  outputs="text",
70
+ title="Benchmark Analyzer (DeepSeek Free)",
71
+ description="Upload a benchmark CSV file to analyze test performance based on accuracy."
72
  )
73
 
74
+ iface.launch()