fix
Browse files
data.py
CHANGED
|
@@ -433,43 +433,34 @@ def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_n
|
|
| 433 |
|
| 434 |
def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame) -> list[dict]:
|
| 435 |
"""
|
| 436 |
-
Compare
|
| 437 |
|
| 438 |
-
|
| 439 |
-
-
|
| 440 |
-
-
|
| 441 |
-
- device: 'amd' or 'nvidia'
|
| 442 |
-
- gpu_type: 'single' or 'multi'
|
| 443 |
"""
|
| 444 |
if current_df.empty or historical_df.empty:
|
| 445 |
return []
|
| 446 |
|
| 447 |
new_regressions = []
|
| 448 |
|
| 449 |
-
# Get the
|
| 450 |
available_dates = sorted(historical_df['date'].unique(), reverse=True)
|
| 451 |
-
if len(available_dates) <
|
| 452 |
-
#
|
| 453 |
return []
|
| 454 |
|
| 455 |
-
|
| 456 |
-
yesterday_date = available_dates[1]
|
| 457 |
-
|
| 458 |
-
# Get data for both dates
|
| 459 |
-
today_data = historical_df[historical_df['date'] == today_date]
|
| 460 |
yesterday_data = historical_df[historical_df['date'] == yesterday_date]
|
| 461 |
|
| 462 |
-
# For each model, compare
|
| 463 |
for model_name in current_df.index:
|
| 464 |
model_name_lower = model_name.lower()
|
| 465 |
|
| 466 |
-
# Get
|
| 467 |
-
|
| 468 |
-
if today_row.empty:
|
| 469 |
-
continue
|
| 470 |
-
today_row = today_row.iloc[0]
|
| 471 |
|
| 472 |
-
# Get
|
| 473 |
yesterday_row = yesterday_data[yesterday_data.index == model_name_lower]
|
| 474 |
yesterday_failures_amd = {}
|
| 475 |
yesterday_failures_nvidia = {}
|
|
@@ -491,33 +482,33 @@ def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame)
|
|
| 491 |
except:
|
| 492 |
yesterday_failures_nvidia = {}
|
| 493 |
|
| 494 |
-
# Get
|
| 495 |
-
|
| 496 |
-
|
| 497 |
|
| 498 |
# Handle string/dict conversion
|
| 499 |
-
if isinstance(
|
| 500 |
try:
|
| 501 |
-
|
| 502 |
except:
|
| 503 |
-
|
| 504 |
-
if isinstance(
|
| 505 |
try:
|
| 506 |
-
|
| 507 |
except:
|
| 508 |
-
|
| 509 |
|
| 510 |
-
# Check AMD failures
|
| 511 |
for gpu_type in ['single', 'multi']:
|
| 512 |
-
|
| 513 |
yesterday_tests = yesterday_failures_amd.get(gpu_type, [])
|
| 514 |
|
| 515 |
# Get test names
|
| 516 |
-
|
| 517 |
yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
|
| 518 |
|
| 519 |
-
# Find
|
| 520 |
-
new_tests =
|
| 521 |
for test_name in new_tests:
|
| 522 |
if test_name: # Skip empty names
|
| 523 |
new_regressions.append({
|
|
@@ -528,17 +519,17 @@ def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame)
|
|
| 528 |
'gpu_type': gpu_type
|
| 529 |
})
|
| 530 |
|
| 531 |
-
# Check NVIDIA failures
|
| 532 |
for gpu_type in ['single', 'multi']:
|
| 533 |
-
|
| 534 |
yesterday_tests = yesterday_failures_nvidia.get(gpu_type, [])
|
| 535 |
|
| 536 |
# Get test names
|
| 537 |
-
|
| 538 |
yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
|
| 539 |
|
| 540 |
-
# Find
|
| 541 |
-
new_tests =
|
| 542 |
for test_name in new_tests:
|
| 543 |
if test_name: # Skip empty names
|
| 544 |
new_regressions.append({
|
|
|
|
| 433 |
|
| 434 |
def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame) -> list[dict]:
|
| 435 |
"""
|
| 436 |
+
Compare CURRENT failures against PREVIOUS day's failures to find NEW regressions.
|
| 437 |
|
| 438 |
+
A regression is a test that:
|
| 439 |
+
- Is failing in the CURRENT/LATEST run (current_df)
|
| 440 |
+
- Was NOT failing in the PREVIOUS run (yesterday in historical_df)
|
|
|
|
|
|
|
| 441 |
"""
|
| 442 |
if current_df.empty or historical_df.empty:
|
| 443 |
return []
|
| 444 |
|
| 445 |
new_regressions = []
|
| 446 |
|
| 447 |
+
# Get the most recent date from historical data (this is "yesterday")
|
| 448 |
available_dates = sorted(historical_df['date'].unique(), reverse=True)
|
| 449 |
+
if len(available_dates) < 1:
|
| 450 |
+
# No history to compare against
|
| 451 |
return []
|
| 452 |
|
| 453 |
+
yesterday_date = available_dates[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 454 |
yesterday_data = historical_df[historical_df['date'] == yesterday_date]
|
| 455 |
|
| 456 |
+
# For each model in current data, compare against yesterday
|
| 457 |
for model_name in current_df.index:
|
| 458 |
model_name_lower = model_name.lower()
|
| 459 |
|
| 460 |
+
# Get CURRENT failures from current_df
|
| 461 |
+
current_row = current_df.loc[model_name]
|
|
|
|
|
|
|
|
|
|
| 462 |
|
| 463 |
+
# Get YESTERDAY's failures from historical_df
|
| 464 |
yesterday_row = yesterday_data[yesterday_data.index == model_name_lower]
|
| 465 |
yesterday_failures_amd = {}
|
| 466 |
yesterday_failures_nvidia = {}
|
|
|
|
| 482 |
except:
|
| 483 |
yesterday_failures_nvidia = {}
|
| 484 |
|
| 485 |
+
# Get CURRENT failures
|
| 486 |
+
current_failures_amd = current_row.get('failures_amd', {})
|
| 487 |
+
current_failures_nvidia = current_row.get('failures_nvidia', {})
|
| 488 |
|
| 489 |
# Handle string/dict conversion
|
| 490 |
+
if isinstance(current_failures_amd, str):
|
| 491 |
try:
|
| 492 |
+
current_failures_amd = json.loads(current_failures_amd)
|
| 493 |
except:
|
| 494 |
+
current_failures_amd = {}
|
| 495 |
+
if isinstance(current_failures_nvidia, str):
|
| 496 |
try:
|
| 497 |
+
current_failures_nvidia = json.loads(current_failures_nvidia)
|
| 498 |
except:
|
| 499 |
+
current_failures_nvidia = {}
|
| 500 |
|
| 501 |
+
# Check AMD failures - find tests failing NOW but NOT yesterday
|
| 502 |
for gpu_type in ['single', 'multi']:
|
| 503 |
+
current_tests = current_failures_amd.get(gpu_type, [])
|
| 504 |
yesterday_tests = yesterday_failures_amd.get(gpu_type, [])
|
| 505 |
|
| 506 |
# Get test names
|
| 507 |
+
current_test_names = {test.get('line', '') for test in current_tests}
|
| 508 |
yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
|
| 509 |
|
| 510 |
+
# Find NEW failures: failing NOW but NOT yesterday
|
| 511 |
+
new_tests = current_test_names - yesterday_test_names
|
| 512 |
for test_name in new_tests:
|
| 513 |
if test_name: # Skip empty names
|
| 514 |
new_regressions.append({
|
|
|
|
| 519 |
'gpu_type': gpu_type
|
| 520 |
})
|
| 521 |
|
| 522 |
+
# Check NVIDIA failures - find tests failing NOW but NOT yesterday
|
| 523 |
for gpu_type in ['single', 'multi']:
|
| 524 |
+
current_tests = current_failures_nvidia.get(gpu_type, [])
|
| 525 |
yesterday_tests = yesterday_failures_nvidia.get(gpu_type, [])
|
| 526 |
|
| 527 |
# Get test names
|
| 528 |
+
current_test_names = {test.get('line', '') for test in current_tests}
|
| 529 |
yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
|
| 530 |
|
| 531 |
+
# Find NEW failures: failing NOW but NOT yesterday
|
| 532 |
+
new_tests = current_test_names - yesterday_test_names
|
| 533 |
for test_name in new_tests:
|
| 534 |
if test_name: # Skip empty names
|
| 535 |
new_regressions.append({
|