Tcid

Running

App Files Files Community

Abdennacer Badaoui commited on 14 days ago

Commit

27f5ac2

1 Parent(s): 84cf73f

first seen date of a failing test

Browse files

Files changed (3) hide show

app.py +2 -2
data.py +53 -2
model_page.py +34 -12

app.py CHANGED Viewed

@@ -490,7 +490,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func)
         else:
             # Switch to current mode: show model if selected; otherwise summary
             if last_selected_model and Ci_results.df is not None and not Ci_results.df.empty and last_selected_model in Ci_results.df.index:
-                fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, last_selected_model)
                 return (
                     gr.update(visible=True),    # current_view
                     gr.update(visible=False),   # historical_view
@@ -577,7 +577,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func)
                 gr.update(visible=True),     # time_series_detail_view
             selected_model, True)            # selected_model_state, in_model_view_state
         else:
-            fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, selected_model)
             return (
                 fig,
                 amd_txt,

         else:
             # Switch to current mode: show model if selected; otherwise summary
             if last_selected_model and Ci_results.df is not None and not Ci_results.df.empty and last_selected_model in Ci_results.df.index:
+                fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, last_selected_model, Ci_results.all_historical_data)
                 return (
                     gr.update(visible=True),    # current_view
                     gr.update(visible=False),   # historical_view
                 gr.update(visible=True),     # time_series_detail_view
             selected_model, True)            # selected_model_state, in_model_view_state
         else:
+            fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, selected_model, Ci_results.all_historical_data)
             return (
                 fig,
                 amd_txt,

data.py CHANGED Viewed

@@ -8,8 +8,8 @@ import json
 import re
 from typing import List, Tuple, Optional
-# NOTE: if caching is an issue, try adding `use_listings_cache=False`
-fs = HfFileSystem()
 IMPORTANT_MODELS = [
     "auto",
@@ -375,6 +375,57 @@ def get_fake_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
 def safe_extract(row: pd.DataFrame, key: str) -> int:
     return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
 def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
     """Extract and process model data from DataFrame row."""
     # Handle missing values and get counts directly from dataframe

 import re
 from typing import List, Tuple, Optional
+# NOTE: Disable caching to ensure fresh data on each request
+fs = HfFileSystem(use_listings_cache=False)
 IMPORTANT_MODELS = [
     "auto",
 def safe_extract(row: pd.DataFrame, key: str) -> int:
     return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
+def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_name: str, device: str, gpu_type: str) -> Optional[str]:
+    """
+    Find the first date when a specific test failure appeared in historical data.
+    """
+    if historical_df.empty:
+        return None
+    try:
+        # Normalize model name to match DataFrame index
+        model_name_lower = model_name.lower()
+        # Filter historical data for this model
+        model_data = historical_df[historical_df.index == model_name_lower].copy()
+        if model_data.empty:
+            return None
+        # Sort by date (oldest first)
+        model_data = model_data.sort_values('date')
+        # Check each date for this failure
+        for idx, row in model_data.iterrows():
+            failures = row.get(f'failures_{device}', None)
+            if failures is None or pd.isna(failures):
+                continue
+            # Handle case where failures might be a string (JSON)
+            if isinstance(failures, str):
+                try:
+                    import json
+                    failures = json.loads(failures)
+                except:
+                    continue
+            # Check if this test appears in the failures for this gpu_type
+            if gpu_type in failures:
+                for test in failures[gpu_type]:
+                    test_line = test.get('line', '')
+                    if test_line == test_name:
+                        # Found the first occurrence
+                        return row.get('date', None)
+        return None
+    except Exception as e:
+        logger.error(f"Error finding first seen date for {test_name}: {e}")
+        return None
 def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
     """Extract and process model data from DataFrame row."""
     # Handle missing values and get counts directly from dataframe

model_page.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 from utils import generate_underlined_line
-from data import extract_model_data
 # Figure dimensions
 FIGURE_WIDTH_DUAL = 18
@@ -85,7 +85,7 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
                 pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
-def plot_model_stats(df: pd.DataFrame, model_name: str) -> tuple[plt.Figure, str, str]:
     """Draws pie charts of model's passed, failed, skipped, and error stats for AMD and NVIDIA."""
     # Handle case where the dataframe is empty or the model name could not be found in it
     if df.empty or model_name not in df.index:
@@ -135,14 +135,14 @@ def plot_model_stats(df: pd.DataFrame, model_name: str) -> tuple[plt.Figure, str
     plt.tight_layout()
     plt.subplots_adjust(top=SUBPLOT_TOP, wspace=SUBPLOT_WSPACE)
-    amd_failed_info = prepare_textbox_content(failures_amd, 'AMD', bool(amd_filtered))
-    nvidia_failed_info = prepare_textbox_content(failures_nvidia, 'NVIDIA', bool(nvidia_filtered))
     return fig, amd_failed_info, nvidia_failed_info
-def prepare_textbox_content(failures: dict[str, list], device: str, data_available: bool) -> str:
-    """Extract failure information from failures object."""
     # Catch the case where there is no data
     if not data_available:
         return generate_underlined_line(f"No data for {device}")
@@ -160,21 +160,43 @@ def prepare_textbox_content(failures: dict[str, list], device: str, data_availab
         ""
     ]
     # Add single-gpu failures
     if single_failures:
         info_lines.append(generate_underlined_line("Single GPU failures:"))
         for test in single_failures:
-            name = test.get("line", "::*could not find name*")
-            name = name.split("::")[-1]
-            info_lines.append(name)
         info_lines.append("\n")
     # Add multi-gpu failures
     if multi_failures:
         info_lines.append(generate_underlined_line("Multi GPU failures:"))
         for test in multi_failures:
-            name = test.get("line", "::*could not find name*")
-            name = name.split("::")[-1]
-            info_lines.append(name)
     return "\n".join(info_lines)

 import matplotlib.pyplot as plt
 import pandas as pd
 from utils import generate_underlined_line
+from data import extract_model_data, find_failure_first_seen
 # Figure dimensions
 FIGURE_WIDTH_DUAL = 18
                 pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
+def plot_model_stats(df: pd.DataFrame, model_name: str, historical_df: pd.DataFrame = None) -> tuple[plt.Figure, str, str]:
     """Draws pie charts of model's passed, failed, skipped, and error stats for AMD and NVIDIA."""
     # Handle case where the dataframe is empty or the model name could not be found in it
     if df.empty or model_name not in df.index:
     plt.tight_layout()
     plt.subplots_adjust(top=SUBPLOT_TOP, wspace=SUBPLOT_WSPACE)
+    amd_failed_info = prepare_textbox_content(failures_amd, 'AMD', bool(amd_filtered), model_name, historical_df)
+    nvidia_failed_info = prepare_textbox_content(failures_nvidia, 'NVIDIA', bool(nvidia_filtered), model_name, historical_df)
     return fig, amd_failed_info, nvidia_failed_info
+def prepare_textbox_content(failures: dict[str, list], device: str, data_available: bool, model_name: str = None, historical_df: pd.DataFrame = None) -> str:
+    """Extract failure information from failures object with first seen dates."""
     # Catch the case where there is no data
     if not data_available:
         return generate_underlined_line(f"No data for {device}")
         ""
     ]
+    # Helper function to format failure line with first seen date
+    def format_failure_line(test: dict, gpu_type: str) -> str:
+        full_name = test.get("line", "::*could not find name*")
+        short_name = full_name.split("::")[-1]
+        # Try to find first seen date if historical data is available
+        if historical_df is not None and model_name is not None and not historical_df.empty:
+            first_seen = find_failure_first_seen(
+                historical_df,
+                model_name,
+                full_name,
+                device.lower(),
+                gpu_type
+            )
+            if first_seen:
+                # Format date as MM-DD-YYYY
+                try:
+                    from datetime import datetime
+                    date_obj = datetime.strptime(first_seen, "%Y-%m-%d")
+                    formatted_date = date_obj.strftime("%m-%d-%Y")
+                    return f"{short_name} (First seen: {formatted_date})"
+                except:
+                    return f"{short_name} (First seen: {first_seen})"
+        return short_name
     # Add single-gpu failures
     if single_failures:
         info_lines.append(generate_underlined_line("Single GPU failures:"))
         for test in single_failures:
+            info_lines.append(format_failure_line(test, "single"))
         info_lines.append("\n")
     # Add multi-gpu failures
     if multi_failures:
         info_lines.append(generate_underlined_line("Multi GPU failures:"))
         for test in multi_failures:
+            info_lines.append(format_failure_line(test, "multi"))
     return "\n".join(info_lines)