Abdennacer Badaoui commited on
Commit
27f5ac2
·
1 Parent(s): 84cf73f

first seen date of a failing test

Browse files
Files changed (3) hide show
  1. app.py +2 -2
  2. data.py +53 -2
  3. model_page.py +34 -12
app.py CHANGED
@@ -490,7 +490,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func)
490
  else:
491
  # Switch to current mode: show model if selected; otherwise summary
492
  if last_selected_model and Ci_results.df is not None and not Ci_results.df.empty and last_selected_model in Ci_results.df.index:
493
- fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, last_selected_model)
494
  return (
495
  gr.update(visible=True), # current_view
496
  gr.update(visible=False), # historical_view
@@ -577,7 +577,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func)
577
  gr.update(visible=True), # time_series_detail_view
578
  selected_model, True) # selected_model_state, in_model_view_state
579
  else:
580
- fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, selected_model)
581
  return (
582
  fig,
583
  amd_txt,
 
490
  else:
491
  # Switch to current mode: show model if selected; otherwise summary
492
  if last_selected_model and Ci_results.df is not None and not Ci_results.df.empty and last_selected_model in Ci_results.df.index:
493
+ fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, last_selected_model, Ci_results.all_historical_data)
494
  return (
495
  gr.update(visible=True), # current_view
496
  gr.update(visible=False), # historical_view
 
577
  gr.update(visible=True), # time_series_detail_view
578
  selected_model, True) # selected_model_state, in_model_view_state
579
  else:
580
+ fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, selected_model, Ci_results.all_historical_data)
581
  return (
582
  fig,
583
  amd_txt,
data.py CHANGED
@@ -8,8 +8,8 @@ import json
8
  import re
9
  from typing import List, Tuple, Optional
10
 
11
- # NOTE: if caching is an issue, try adding `use_listings_cache=False`
12
- fs = HfFileSystem()
13
 
14
  IMPORTANT_MODELS = [
15
  "auto",
@@ -375,6 +375,57 @@ def get_fake_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
375
  def safe_extract(row: pd.DataFrame, key: str) -> int:
376
  return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
379
  """Extract and process model data from DataFrame row."""
380
  # Handle missing values and get counts directly from dataframe
 
8
  import re
9
  from typing import List, Tuple, Optional
10
 
11
+ # NOTE: Disable caching to ensure fresh data on each request
12
+ fs = HfFileSystem(use_listings_cache=False)
13
 
14
  IMPORTANT_MODELS = [
15
  "auto",
 
375
  def safe_extract(row: pd.DataFrame, key: str) -> int:
376
  return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
377
 
378
+
379
+ def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_name: str, device: str, gpu_type: str) -> Optional[str]:
380
+ """
381
+ Find the first date when a specific test failure appeared in historical data.
382
+ """
383
+ if historical_df.empty:
384
+ return None
385
+
386
+ try:
387
+ # Normalize model name to match DataFrame index
388
+ model_name_lower = model_name.lower()
389
+
390
+ # Filter historical data for this model
391
+ model_data = historical_df[historical_df.index == model_name_lower].copy()
392
+
393
+ if model_data.empty:
394
+ return None
395
+
396
+ # Sort by date (oldest first)
397
+ model_data = model_data.sort_values('date')
398
+
399
+ # Check each date for this failure
400
+ for idx, row in model_data.iterrows():
401
+ failures = row.get(f'failures_{device}', None)
402
+
403
+ if failures is None or pd.isna(failures):
404
+ continue
405
+
406
+ # Handle case where failures might be a string (JSON)
407
+ if isinstance(failures, str):
408
+ try:
409
+ import json
410
+ failures = json.loads(failures)
411
+ except:
412
+ continue
413
+
414
+ # Check if this test appears in the failures for this gpu_type
415
+ if gpu_type in failures:
416
+ for test in failures[gpu_type]:
417
+ test_line = test.get('line', '')
418
+ if test_line == test_name:
419
+ # Found the first occurrence
420
+ return row.get('date', None)
421
+
422
+ return None
423
+
424
+ except Exception as e:
425
+ logger.error(f"Error finding first seen date for {test_name}: {e}")
426
+ return None
427
+
428
+
429
  def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
430
  """Extract and process model data from DataFrame row."""
431
  # Handle missing values and get counts directly from dataframe
model_page.py CHANGED
@@ -1,7 +1,7 @@
1
  import matplotlib.pyplot as plt
2
  import pandas as pd
3
  from utils import generate_underlined_line
4
- from data import extract_model_data
5
 
6
  # Figure dimensions
7
  FIGURE_WIDTH_DUAL = 18
@@ -85,7 +85,7 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
85
  pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
86
 
87
 
88
- def plot_model_stats(df: pd.DataFrame, model_name: str) -> tuple[plt.Figure, str, str]:
89
  """Draws pie charts of model's passed, failed, skipped, and error stats for AMD and NVIDIA."""
90
  # Handle case where the dataframe is empty or the model name could not be found in it
91
  if df.empty or model_name not in df.index:
@@ -135,14 +135,14 @@ def plot_model_stats(df: pd.DataFrame, model_name: str) -> tuple[plt.Figure, str
135
  plt.tight_layout()
136
  plt.subplots_adjust(top=SUBPLOT_TOP, wspace=SUBPLOT_WSPACE)
137
 
138
- amd_failed_info = prepare_textbox_content(failures_amd, 'AMD', bool(amd_filtered))
139
- nvidia_failed_info = prepare_textbox_content(failures_nvidia, 'NVIDIA', bool(nvidia_filtered))
140
 
141
  return fig, amd_failed_info, nvidia_failed_info
142
 
143
 
144
- def prepare_textbox_content(failures: dict[str, list], device: str, data_available: bool) -> str:
145
- """Extract failure information from failures object."""
146
  # Catch the case where there is no data
147
  if not data_available:
148
  return generate_underlined_line(f"No data for {device}")
@@ -160,21 +160,43 @@ def prepare_textbox_content(failures: dict[str, list], device: str, data_availab
160
  ""
161
  ]
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  # Add single-gpu failures
164
  if single_failures:
165
  info_lines.append(generate_underlined_line("Single GPU failures:"))
166
  for test in single_failures:
167
- name = test.get("line", "::*could not find name*")
168
- name = name.split("::")[-1]
169
- info_lines.append(name)
170
  info_lines.append("\n")
171
 
172
  # Add multi-gpu failures
173
  if multi_failures:
174
  info_lines.append(generate_underlined_line("Multi GPU failures:"))
175
  for test in multi_failures:
176
- name = test.get("line", "::*could not find name*")
177
- name = name.split("::")[-1]
178
- info_lines.append(name)
179
 
180
  return "\n".join(info_lines)
 
1
  import matplotlib.pyplot as plt
2
  import pandas as pd
3
  from utils import generate_underlined_line
4
+ from data import extract_model_data, find_failure_first_seen
5
 
6
  # Figure dimensions
7
  FIGURE_WIDTH_DUAL = 18
 
85
  pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
86
 
87
 
88
+ def plot_model_stats(df: pd.DataFrame, model_name: str, historical_df: pd.DataFrame = None) -> tuple[plt.Figure, str, str]:
89
  """Draws pie charts of model's passed, failed, skipped, and error stats for AMD and NVIDIA."""
90
  # Handle case where the dataframe is empty or the model name could not be found in it
91
  if df.empty or model_name not in df.index:
 
135
  plt.tight_layout()
136
  plt.subplots_adjust(top=SUBPLOT_TOP, wspace=SUBPLOT_WSPACE)
137
 
138
+ amd_failed_info = prepare_textbox_content(failures_amd, 'AMD', bool(amd_filtered), model_name, historical_df)
139
+ nvidia_failed_info = prepare_textbox_content(failures_nvidia, 'NVIDIA', bool(nvidia_filtered), model_name, historical_df)
140
 
141
  return fig, amd_failed_info, nvidia_failed_info
142
 
143
 
144
+ def prepare_textbox_content(failures: dict[str, list], device: str, data_available: bool, model_name: str = None, historical_df: pd.DataFrame = None) -> str:
145
+ """Extract failure information from failures object with first seen dates."""
146
  # Catch the case where there is no data
147
  if not data_available:
148
  return generate_underlined_line(f"No data for {device}")
 
160
  ""
161
  ]
162
 
163
+ # Helper function to format failure line with first seen date
164
+ def format_failure_line(test: dict, gpu_type: str) -> str:
165
+ full_name = test.get("line", "::*could not find name*")
166
+ short_name = full_name.split("::")[-1]
167
+
168
+ # Try to find first seen date if historical data is available
169
+ if historical_df is not None and model_name is not None and not historical_df.empty:
170
+ first_seen = find_failure_first_seen(
171
+ historical_df,
172
+ model_name,
173
+ full_name,
174
+ device.lower(),
175
+ gpu_type
176
+ )
177
+ if first_seen:
178
+ # Format date as MM-DD-YYYY
179
+ try:
180
+ from datetime import datetime
181
+ date_obj = datetime.strptime(first_seen, "%Y-%m-%d")
182
+ formatted_date = date_obj.strftime("%m-%d-%Y")
183
+ return f"{short_name} (First seen: {formatted_date})"
184
+ except:
185
+ return f"{short_name} (First seen: {first_seen})"
186
+
187
+ return short_name
188
+
189
  # Add single-gpu failures
190
  if single_failures:
191
  info_lines.append(generate_underlined_line("Single GPU failures:"))
192
  for test in single_failures:
193
+ info_lines.append(format_failure_line(test, "single"))
 
 
194
  info_lines.append("\n")
195
 
196
  # Add multi-gpu failures
197
  if multi_failures:
198
  info_lines.append(generate_underlined_line("Multi GPU failures:"))
199
  for test in multi_failures:
200
+ info_lines.append(format_failure_line(test, "multi"))
 
 
201
 
202
  return "\n".join(info_lines)