Spaces:

alishabhale
/

benchMarkAnalysis

Runtime error

App Files Files Community

alishabhale commited on Mar 20

Commit

5a948c5

verified ·

1 Parent(s): f6debe6

Updated with deepseek model

Browse files

Files changed (1) hide show

app.py +60 -42

app.py CHANGED Viewed

@@ -1,56 +1,74 @@
 import os
 import gradio as gr
 import pandas as pd
 # ✅ Function to analyze CSV data based on accuracy
 def analyze_csv(file):
-    df = pd.read_csv(file.name)  # Read uploaded CSV
-    # ✅ Ensure column names are stripped of extra spaces
-    df.columns = df.columns.str.strip()
-    # ✅ Validate required columns
-    required_columns = {"Host Name", "Workload", "Threads", "TPS", "95_Percentile_Latency", "CPU Utilization"}
-    if not required_columns.issubset(df.columns):
-        return "Error: Missing one or more required columns in the uploaded CSV."
-    # ✅ Convert necessary columns to numeric values (handling errors)
-    numeric_cols = ["TPS", "95_Percentile_Latency", "CPU Utilization"]
-    df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors="coerce")
-    # ✅ Handle missing or invalid values by dropping rows with NaN
-    df = df.dropna(subset=numeric_cols)
-    # ✅ Prevent division by zero
-    df["95_Percentile_Latency"] = df["95_Percentile_Latency"].replace(0, 1e-6)
-    df["CPU Utilization"] = df["CPU Utilization"].replace(0, 1e-6)
-    # ✅ Calculate Accuracy Score
-    df["Accuracy Score"] = df["TPS"] / (df["95_Percentile_Latency"] * df["CPU Utilization"])
-    # ✅ Find the best-performing test run
-    best_run = df.loc[df["Accuracy Score"].idxmax()]
-    # ✅ Format output
-    result = f"""
-    **Benchmark Analysis Results:**
-    - **Best Performing Test Run:** {best_run["Host Name"]} - {best_run["Workload"]} (Threads: {best_run["Threads"]})
-    - **Accuracy Score:** {best_run["Accuracy Score"]:.4f}
-    - **TPS:** {best_run["TPS"]}
-    - **95th Percentile Latency:** {best_run["95_Percentile_Latency"]} ms
-    - **CPU Utilization:** {best_run["CPU Utilization"]}%
-    """
-    return result
 # ✅ Gradio Interface
 iface = gr.Interface(
     fn=analyze_csv,
     inputs=gr.File(label="Upload CSV File"),
     outputs="text",
-    title="Benchmark Accuracy Analyzer",
-    description="Upload a CSV file containing benchmark test results. The tool will calculate accuracy scores and determine the best-performing test run.",
 )
-# ✅ Launch the Gradio app
-iface.launch()

 import os
 import gradio as gr
 import pandas as pd
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+# ✅ Use DeepSeek Free Model
+model_name = "deepseek-ai/deepseek-coder-6.7b"
+# ✅ Load DeepSeek model & tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 # ✅ Function to analyze CSV data based on accuracy
 def analyze_csv(file):
+    try:
+        df = pd.read_csv(file.name)  # Read uploaded CSV
+        # ✅ Ensure column names are stripped of extra spaces
+        df.columns = df.columns.str.strip()
+        # ✅ Validate required columns
+        required_columns = {"Run ID", "Latency (ms)", "Throughput (req/sec)", "Memory Usage (GB)", "CPU Utilization (%)"}
+        if not required_columns.issubset(df.columns):
+            return f"Error: Missing one or more required columns. Required: {', '.join(required_columns)}"
+        # ✅ Avoid division errors (replace zero values in Latency & Memory Usage)
+        df["Latency (ms)"].replace(0, 1e-6, inplace=True)
+        df["Memory Usage (GB)"].replace(0, 1e-6, inplace=True)
+        # ✅ Calculate Accuracy Score: Throughput / (Latency * Memory Usage)
+        df["Accuracy Score"] = df["Throughput (req/sec)"] / (df["Latency (ms)"] * df["Memory Usage (GB)"])
+        # ✅ Find the best-performing model
+        best_model = df.loc[df["Accuracy Score"].idxmax()]
+        best_run_id = best_model["Run ID"]
+        # ✅ Construct analysis summary
+        summary = f"""
+        **🏆 Best Performing Test Run:** `{best_run_id}`
+        - **Latency:** {best_model["Latency (ms)"]} ms
+        - **Throughput:** {best_model["Throughput (req/sec)"]} req/sec
+        - **Memory Usage:** {best_model["Memory Usage (GB)"]} GB
+        - **CPU Utilization:** {best_model["CPU Utilization (%)"]}%
+        - **Accuracy Score:** {best_model["Accuracy Score"]:.6f}
+        ---
+        **📊 Accuracy Ranking Table**
+        ```plaintext
+        {df[["Run ID", "Accuracy Score"]].sort_values(by="Accuracy Score", ascending=False).to_string(index=False)}
+        ```
+        ---
+        Based on this benchmark, generate insights on why this test run performed best and provide recommendations.
+        """
+        # ✅ Generate AI-based insights using DeepSeek
+        output = pipe(summary, max_new_tokens=150, do_sample=True, temperature=0.7)
+        return f"{summary}\n\n### 🤖 AI Insights:\n{output[0]['generated_text']}"
+    except Exception as e:
+        return f"⚠️ Error processing CSV: {str(e)}"
 # ✅ Gradio Interface
 iface = gr.Interface(
     fn=analyze_csv,
     inputs=gr.File(label="Upload CSV File"),
     outputs="text",
+    title="Benchmark Analyzer (DeepSeek Free)",
+    description="Upload a benchmark CSV file to analyze test performance based on accuracy."
 )
+iface.launch()