Tcid

Running

App Files Files Community

manueldeprada HF Staff commited on Sep 13

Commit

be26939

verified ·

1 Parent(s): d87fc8a

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

app.py +185 -33
data.py +130 -2
styles.css +104 -0
time_series.py +251 -0

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from data import CIResults
 from utils import logger
 from summary_page import create_summary_page
 from model_page import plot_model_stats
 # Configure matplotlib to prevent memory warnings and set dark background
@@ -91,7 +92,47 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
             description_text = get_description_text()
             description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
-            # Summary button at the top
             summary_button = gr.Button(
                 "summary\n📊",
                 variant="primary",
@@ -134,45 +175,64 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
         # Main content area
         with gr.Column(scale=4, elem_classes=["main-content"]):
-            # Summary display (default view)
-            summary_display = gr.Plot(
-                value=create_summary_page(Ci_results.df, Ci_results.available_models),
-                label="",
-                format="png",
-                elem_classes=["plot-container"],
-                visible=True
-            )
-            # Detailed view components (hidden by default)
-            with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view:
-                # Create the plot output
-                plot_output = gr.Plot(
                     label="",
                     format="png",
                     elem_classes=["plot-container"]
                 )
-                # Create two separate failed tests displays in a row layout
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        amd_failed_tests_output = gr.Textbox(
-                            value="",
-                            lines=8,
-                            max_lines=8,
-                            interactive=False,
-                            container=False,
-                            elem_classes=["failed-tests"]
-                        )
-                    with gr.Column(scale=1):
-                        nvidia_failed_tests_output = gr.Textbox(
-                            value="",
-                            lines=8,
-                            max_lines=8,
-                            interactive=False,
-                            container=False,
-                            elem_classes=["failed-tests"]
-                        )
     # Set up click handlers for model buttons
     for i, btn in enumerate(model_buttons):
@@ -293,6 +353,98 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css()) as demo:
             return "🔗 **CI Jobs:** *Error loading links*\n\n❓ **[FAQ](README.md)**"
     # Auto-update CI links when the interface loads
     demo.load(
         fn=get_ci_links,

 from utils import logger
 from summary_page import create_summary_page
 from model_page import plot_model_stats
+from time_series import create_time_series_summary, create_model_time_series
 # Configure matplotlib to prevent memory warnings and set dark background
             description_text = get_description_text()
             description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
+            # View toggle buttons
+            with gr.Row(elem_classes=["view-toggle-row"]):
+                current_view_button = gr.Button(
+                    "current\n📊",
+                    variant="primary",
+                    size="lg",
+                    elem_classes=["view-toggle-button", "view-toggle-active"]
+                )
+                historical_view_button = gr.Button(
+                    "history\n📈",
+                    variant="secondary",
+                    size="lg",
+                    elem_classes=["view-toggle-button"]
+                )
+            # Date selection (initially hidden)
+            with gr.Column(visible=False, elem_classes=["date-selection"]) as date_selection:
+                gr.Markdown("**📅 Date Range Selection**", elem_classes=["date-header"])
+                with gr.Row():
+                    start_date = gr.Dropdown(
+                        choices=Ci_results.available_dates,
+                        value=Ci_results.available_dates[0] if Ci_results.available_dates else None,
+                        label="Start Date",
+                        elem_classes=["date-dropdown"]
+                    )
+                    end_date = gr.Dropdown(
+                        choices=Ci_results.available_dates,
+                        value=Ci_results.available_dates[0] if Ci_results.available_dates else None,
+                        label="End Date",
+                        elem_classes=["date-dropdown"]
+                    )
+                load_historical_button = gr.Button(
+                    "Load Historical Data",
+                    variant="primary",
+                    size="sm",
+                    elem_classes=["load-historical-button"]
+                )
+            # Summary button (for current view)
             summary_button = gr.Button(
                 "summary\n📊",
                 variant="primary",
         # Main content area
         with gr.Column(scale=4, elem_classes=["main-content"]):
+            # Current view components
+            with gr.Column(visible=True, elem_classes=["current-view"]) as current_view:
+                # Summary display (default view)
+                summary_display = gr.Plot(
+                    value=create_summary_page(Ci_results.df, Ci_results.available_models),
+                    label="",
+                    format="png",
+                    elem_classes=["plot-container"],
+                    visible=True
+                )
+                # Detailed view components (hidden by default)
+                with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view:
+                    # Create the plot output
+                    plot_output = gr.Plot(
+                        label="",
+                        format="png",
+                        elem_classes=["plot-container"]
+                    )
+                    # Create two separate failed tests displays in a row layout
+                    with gr.Row():
+                        with gr.Column(scale=1):
+                            amd_failed_tests_output = gr.Textbox(
+                                value="",
+                                lines=8,
+                                max_lines=8,
+                                interactive=False,
+                                container=False,
+                                elem_classes=["failed-tests"]
+                            )
+                        with gr.Column(scale=1):
+                            nvidia_failed_tests_output = gr.Textbox(
+                                value="",
+                                lines=8,
+                                max_lines=8,
+                                interactive=False,
+                                container=False,
+                                elem_classes=["failed-tests"]
+                            )
+            # Historical view components (hidden by default)
+            with gr.Column(visible=False, elem_classes=["historical-view"]) as historical_view:
+                # Time-series summary display
+                time_series_summary_display = gr.Plot(
                     label="",
                     format="png",
                     elem_classes=["plot-container"]
                 )
+                # Time-series model view (hidden by default)
+                with gr.Column(visible=False, elem_classes=["time-series-detail-view"]) as time_series_detail_view:
+                    # Create the time-series plot output
+                    time_series_plot_output = gr.Plot(
+                        label="",
+                        format="png",
+                        elem_classes=["plot-container"]
+                    )
     # Set up click handlers for model buttons
     for i, btn in enumerate(model_buttons):
             return "🔗 **CI Jobs:** *Error loading links*\n\n❓ **[FAQ](README.md)**"
+    # View toggle functionality
+    def toggle_to_current_view():
+        """Switch to current view."""
+        return [
+            gr.update(visible=True),   # current_view
+            gr.update(visible=False),  # historical_view
+            gr.update(visible=False),  # date_selection
+            gr.update(visible=True),   # summary_button
+            gr.update(variant="primary", elem_classes=["view-toggle-button", "view-toggle-active"]),  # current_view_button
+            gr.update(variant="secondary", elem_classes=["view-toggle-button"])  # historical_view_button
+        ]
+    def toggle_to_historical_view():
+        """Switch to historical view."""
+        return [
+            gr.update(visible=False),  # current_view
+            gr.update(visible=True),   # historical_view
+            gr.update(visible=True),   # date_selection
+            gr.update(visible=False),  # summary_button
+            gr.update(variant="secondary", elem_classes=["view-toggle-button"]),  # current_view_button
+            gr.update(variant="primary", elem_classes=["view-toggle-button", "view-toggle-active"])  # historical_view_button
+        ]
+    current_view_button.click(
+        fn=toggle_to_current_view,
+        outputs=[current_view, historical_view, date_selection, summary_button, current_view_button, historical_view_button]
+    )
+    historical_view_button.click(
+        fn=toggle_to_historical_view,
+        outputs=[current_view, historical_view, date_selection, summary_button, current_view_button, historical_view_button]
+    )
+    # Historical data loading functionality
+    def load_historical_data(start_date, end_date):
+        """Load and display historical data."""
+        if not start_date or not end_date:
+            return gr.update(), "Please select both start and end dates."
+        try:
+            Ci_results.load_historical_data(start_date, end_date)
+            if Ci_results.historical_df.empty:
+                return gr.update(), "No historical data found for the selected date range."
+            # Create time-series summary plot
+            time_series_plot = create_time_series_summary(Ci_results.historical_df)
+            return time_series_plot, f"Loaded historical data from {start_date} to {end_date}"
+        except Exception as e:
+            logger.error(f"Error loading historical data: {e}")
+            return gr.update(), f"Error loading historical data: {str(e)}"
+    load_historical_button.click(
+        fn=load_historical_data,
+        inputs=[start_date, end_date],
+        outputs=[time_series_summary_display, description_display]
+    )
+    # Time-series model selection functionality
+    def show_time_series_model(selected_model):
+        """Show time-series view for a specific model."""
+        if Ci_results.historical_df.empty:
+            return gr.update(), "No historical data loaded. Please load historical data first."
+        try:
+            time_series_plot = create_model_time_series(Ci_results.historical_df, selected_model)
+            return time_series_plot
+        except Exception as e:
+            logger.error(f"Error creating time-series for model {selected_model}: {e}")
+            return gr.update()
+    # Update model button handlers to work with both views
+    for i, btn in enumerate(model_buttons):
+        model_name = model_choices[i]
+        # Current view handler (existing functionality)
+        btn.click(
+            fn=lambda selected_model=model_name: plot_model_stats(Ci_results.df, selected_model),
+            outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output]
+        ).then(
+            fn=lambda: [gr.update(visible=False), gr.update(visible=True)],
+            outputs=[summary_display, detail_view]
+        )
+        # Historical view handler (new functionality)
+        btn.click(
+            fn=lambda selected_model=model_name: show_time_series_model(selected_model),
+            outputs=[time_series_plot_output]
+        ).then(
+            fn=lambda: [gr.update(visible=False), gr.update(visible=True)],
+            outputs=[time_series_summary_display, time_series_detail_view]
+        )
     # Auto-update CI links when the interface loads
     demo.load(
         fn=get_ci_links,

data.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from huggingface_hub import HfFileSystem
 import pandas as pd
 from utils import logger
-from datetime import datetime
 import threading
 import traceback
 import json
 import re
 # NOTE: if caching is an issue, try adding `use_listings_cache=False`
 fs = HfFileSystem()
@@ -92,12 +93,118 @@ def infer_latest_update_msg(date_df_amd: str, date_df_nvidia: str) -> str:
 def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame, str]:
     df_upload_date = log_dataframe_link(json_path)
-    df = pd.read_json(json_path, orient="index", encoding_errors="ignore")
     df.index.name = "model_name"
     df[f"failed_multi_no_{device_label}"] = df["failures"].apply(lambda x: len(x["multi"]) if "multi" in x else 0)
     df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
     return df, df_upload_date
 def get_distant_data() -> tuple[pd.DataFrame, str]:
     # Retrieve AMD dataframe
     amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
@@ -184,6 +291,8 @@ class CIResults:
         self.df = pd.DataFrame()
         self.available_models = []
         self.latest_update_msg = ""
     def load_data(self) -> None:
         """Load data from the data source."""
@@ -203,6 +312,15 @@ class CIResults:
             logger.error("\n".join(error_msg))
             new_df, latest_update_msg = get_sample_data()
             self.latest_update_msg = latest_update_msg
         # Update attributes
         self.df = new_df
         self.available_models = new_df.index.tolist()
@@ -223,6 +341,16 @@ class CIResults:
                 msg[model][col] = value
         logger.info(json.dumps(msg, indent=4))
     def schedule_data_reload(self):
         """Schedule the next data reload."""
         def reload_data():

 from huggingface_hub import HfFileSystem
 import pandas as pd
 from utils import logger
+from datetime import datetime, timedelta
 import threading
 import traceback
 import json
 import re
+from typing import List, Tuple, Optional
 # NOTE: if caching is an issue, try adding `use_listings_cache=False`
 fs = HfFileSystem()
 def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame, str]:
     df_upload_date = log_dataframe_link(json_path)
+    df = pd.read_json(json_path, orient="index")
     df.index.name = "model_name"
     df[f"failed_multi_no_{device_label}"] = df["failures"].apply(lambda x: len(x["multi"]) if "multi" in x else 0)
     df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
     return df, df_upload_date
+def get_available_dates() -> List[str]:
+    """Get list of available dates from both AMD and NVIDIA datasets."""
+    try:
+        # Get AMD dates
+        amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
+        files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
+        # Get NVIDIA dates
+        nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
+        files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
+        # Extract dates from file paths
+        amd_dates = set()
+        for file_path in files_amd:
+            pattern = r'transformers_daily_ci(.*?)/(\d{4}-\d{2}-\d{2})'
+            match = re.search(pattern, file_path)
+            if match:
+                amd_dates.add(match.group(2))
+        nvidia_dates = set()
+        for file_path in files_nvidia:
+            pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})'
+            match = re.search(pattern, file_path)
+            if match:
+                nvidia_dates.add(match.group(1))
+        # Return intersection of both datasets (dates where both have data)
+        common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
+        return common_dates[:30]  # Limit to last 30 days for performance
+    except Exception as e:
+        logger.error(f"Error getting available dates: {e}")
+        # Return sample dates for fallback
+        today = datetime.now()
+        return [(today - timedelta(days=i)).strftime("%Y-%m-%d") for i in range(7)]
+def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
+    """Get data for a specific date."""
+    try:
+        # Construct paths for specific date
+        amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci/**/runs/{target_date}/**/ci_results_run_models_gpu/model_results.json"
+        nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
+        # Find matching files
+        files_amd = fs.glob(amd_src, refresh=True)
+        files_nvidia = fs.glob(nvidia_src, refresh=True)
+        if not files_amd or not files_nvidia:
+            raise FileNotFoundError(f"No data found for date {target_date}")
+        # Use the first matching file for each
+        df_amd, _ = read_one_dataframe(f"hf://{files_amd[0]}", "amd")
+        df_nvidia, _ = read_one_dataframe(f"https://huggingface.co/datasets/hf-internal-testing/transformers_daily_ci/raw/main/{target_date}/ci_results_run_models_gpu/model_results.json", "nvidia")
+        # Join both dataframes
+        joined = df_amd.join(df_nvidia, rsuffix="_nvidia", lsuffix="_amd", how="outer")
+        joined = joined[KEYS_TO_KEEP]
+        joined.index = joined.index.str.replace("^models_", "", regex=True)
+        # Filter out all but important models
+        important_models_lower = [model.lower() for model in IMPORTANT_MODELS]
+        filtered_joined = joined[joined.index.str.lower().isin(important_models_lower)]
+        return filtered_joined, target_date
+    except Exception as e:
+        logger.error(f"Error getting data for date {target_date}: {e}")
+        # Fallback to sample data
+        return get_sample_data()
+def get_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
+    """Get historical data for a date range."""
+    try:
+        start_dt = datetime.strptime(start_date, "%Y-%m-%d")
+        end_dt = datetime.strptime(end_date, "%Y-%m-%d")
+        historical_data = []
+        current_dt = start_dt
+        while current_dt <= end_dt:
+            date_str = current_dt.strftime("%Y-%m-%d")
+            try:
+                df, _ = get_data_for_date(date_str)
+                df['date'] = date_str
+                historical_data.append(df)
+                logger.info(f"Loaded data for {date_str}")
+            except Exception as e:
+                logger.warning(f"Could not load data for {date_str}: {e}")
+            current_dt += timedelta(days=1)
+        if not historical_data:
+            raise ValueError("No historical data found for the specified range")
+        # Combine all dataframes
+        combined_df = pd.concat(historical_data, ignore_index=False)
+        return combined_df
+    except Exception as e:
+        logger.error(f"Error getting historical data: {e}")
+        # Return empty dataframe with proper structure
+        return pd.DataFrame()
 def get_distant_data() -> tuple[pd.DataFrame, str]:
     # Retrieve AMD dataframe
     amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
         self.df = pd.DataFrame()
         self.available_models = []
         self.latest_update_msg = ""
+        self.available_dates = []
+        self.historical_df = pd.DataFrame()
     def load_data(self) -> None:
         """Load data from the data source."""
             logger.error("\n".join(error_msg))
             new_df, latest_update_msg = get_sample_data()
             self.latest_update_msg = latest_update_msg
+        # Load available dates
+        try:
+            self.available_dates = get_available_dates()
+            logger.info(f"Available dates: {len(self.available_dates)} dates")
+        except Exception as e:
+            logger.error(f"Error loading available dates: {e}")
+            self.available_dates = []
         # Update attributes
         self.df = new_df
         self.available_models = new_df.index.tolist()
                 msg[model][col] = value
         logger.info(json.dumps(msg, indent=4))
+    def load_historical_data(self, start_date: str, end_date: str) -> None:
+        """Load historical data for a date range."""
+        try:
+            logger.info(f"Loading historical data from {start_date} to {end_date}")
+            self.historical_df = get_historical_data(start_date, end_date)
+            logger.info(f"Historical data loaded: {len(self.historical_df)} records")
+        except Exception as e:
+            logger.error(f"Error loading historical data: {e}")
+            self.historical_df = pd.DataFrame()
     def schedule_data_reload(self):
         """Schedule the next data reload."""
         def reload_data():

styles.css CHANGED Viewed

@@ -667,3 +667,107 @@ h1, h2, h3, p, .markdown {
     100% { scroll-behavior: auto; }
 }

     100% { scroll-behavior: auto; }
 }
+/* View toggle buttons */
+.view-toggle-row {
+    display: flex !important;
+    gap: 5px !important;
+    margin-bottom: 15px !important;
+}
+.view-toggle-button {
+    flex: 1 !important;
+    background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
+    color: white !important;
+    border: 1px solid #333 !important;
+    border-radius: 5px !important;
+    padding: 8px 6px !important;
+    transition: all 0.3s ease !important;
+    font-weight: 600 !important;
+    font-size: 12px !important;
+    text-transform: uppercase !important;
+    letter-spacing: 0.3px !important;
+    font-family: monospace !important;
+    height: 50px !important;
+    display: flex !important;
+    flex-direction: column !important;
+    justify-content: center !important;
+    align-items: center !important;
+    line-height: 1.2 !important;
+    cursor: pointer !important;
+}
+.view-toggle-button:hover {
+    background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
+    border-color: #555 !important;
+}
+.view-toggle-active {
+    background: linear-gradient(135deg, #4a4a4a, #3e3e3e) !important;
+    border: 2px solid #555555 !important;
+    box-shadow:
+        0 4px 15px rgba(0, 0, 0, 0.3),
+        inset 0 1px 0 rgba(255, 255, 255, 0.2) !important;
+}
+/* Date selection styling */
+.date-selection {
+    background: linear-gradient(145deg, #0f0f0f, #1a1a1a) !important;
+    border: 1px solid #333 !important;
+    border-radius: 8px !important;
+    padding: 15px !important;
+    margin-bottom: 15px !important;
+}
+.date-header {
+    margin-bottom: 10px !important;
+    text-align: center !important;
+    color: #74b9ff !important;
+    font-family: monospace !important;
+    font-size: 14px !important;
+}
+.date-dropdown {
+    background-color: #222222 !important;
+    color: white !important;
+    border: 1px solid #444444 !important;
+    border-radius: 5px !important;
+    font-family: monospace !important;
+    font-size: 12px !important;
+}
+.date-dropdown .gr-dropdown {
+    background-color: #222222 !important;
+    color: white !important;
+    border: 1px solid #444444 !important;
+}
+.load-historical-button {
+    background: linear-gradient(135deg, #2d5aa0, #1e3f73) !important;
+    color: white !important;
+    border: 1px solid #3a6bc7 !important;
+    border-radius: 5px !important;
+    padding: 8px 12px !important;
+    transition: all 0.3s ease !important;
+    font-weight: 500 !important;
+    font-size: 12px !important;
+    text-transform: uppercase !important;
+    letter-spacing: 0.1px !important;
+    font-family: monospace !important;
+    width: 100% !important;
+    margin-top: 10px !important;
+}
+.load-historical-button:hover {
+    background: linear-gradient(135deg, #3a6bc7, #2d5aa0) !important;
+    border-color: #4a7bd9 !important;
+}
+/* Historical view styling */
+.historical-view {
+    background-color: #000000 !important;
+}
+.time-series-detail-view {
+    background-color: #000000 !important;
+}

time_series.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+from datetime import datetime
+from data import extract_model_data
+# Colors matching the existing theme
+COLORS = {
+    'passed': '#4CAF50',
+    'failed': '#E53E3E',
+    'skipped': '#FFD54F',
+    'error': '#8B0000'
+}
+# Figure dimensions
+FIGURE_WIDTH = 20
+FIGURE_HEIGHT = 12
+# Styling constants
+BLACK = '#000000'
+LABEL_COLOR = '#CCCCCC'
+TITLE_COLOR = '#FFFFFF'
+GRID_COLOR = '#333333'
+# Font sizes
+TITLE_FONT_SIZE = 24
+LABEL_FONT_SIZE = 14
+LEGEND_FONT_SIZE = 12
+def create_time_series_summary(historical_df: pd.DataFrame) -> plt.Figure:
+    """Create time-series visualization for overall failure rates over time."""
+    if historical_df.empty or 'date' not in historical_df.columns:
+        fig, ax = plt.subplots(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT), facecolor=BLACK)
+        ax.set_facecolor(BLACK)
+        ax.text(0.5, 0.5, 'No historical data available',
+                horizontalalignment='center', verticalalignment='center',
+                transform=ax.transAxes, fontsize=20, color='#888888',
+                fontfamily='monospace', weight='normal')
+        ax.axis('off')
+        return fig
+    # Convert date column to datetime
+    historical_df['date_dt'] = pd.to_datetime(historical_df['date'])
+    historical_df = historical_df.sort_values('date_dt')
+    # Group by date and calculate overall statistics
+    daily_stats = []
+    dates = []
+    for date in historical_df['date_dt'].unique():
+        date_data = historical_df[historical_df['date_dt'] == date]
+        total_amd_passed = 0
+        total_amd_failed = 0
+        total_amd_skipped = 0
+        total_nvidia_passed = 0
+        total_nvidia_failed = 0
+        total_nvidia_skipped = 0
+        for _, row in date_data.iterrows():
+            amd_stats, nvidia_stats = extract_model_data(row)[:2]
+            total_amd_passed += amd_stats['passed']
+            total_amd_failed += amd_stats['failed']
+            total_amd_skipped += amd_stats['skipped']
+            total_nvidia_passed += nvidia_stats['passed']
+            total_nvidia_failed += nvidia_stats['failed']
+            total_nvidia_skipped += nvidia_stats['skipped']
+        # Calculate failure rates
+        amd_total = total_amd_passed + total_amd_failed
+        nvidia_total = total_nvidia_passed + total_nvidia_failed
+        amd_failure_rate = (total_amd_failed / amd_total * 100) if amd_total > 0 else 0
+        nvidia_failure_rate = (total_nvidia_failed / nvidia_total * 100) if nvidia_total > 0 else 0
+        daily_stats.append({
+            'amd_failure_rate': amd_failure_rate,
+            'nvidia_failure_rate': nvidia_failure_rate,
+            'amd_passed': total_amd_passed,
+            'amd_failed': total_amd_failed,
+            'amd_skipped': total_amd_skipped,
+            'nvidia_passed': total_nvidia_passed,
+            'nvidia_failed': total_nvidia_failed,
+            'nvidia_skipped': total_nvidia_skipped
+        })
+        dates.append(date)
+    # Create the plot
+    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(FIGURE_WIDTH, FIGURE_HEIGHT), facecolor=BLACK)
+    ax1.set_facecolor(BLACK)
+    ax2.set_facecolor(BLACK)
+    # Plot 1: Failure rates over time
+    dates_array = np.array(dates)
+    amd_rates = [stat['amd_failure_rate'] for stat in daily_stats]
+    nvidia_rates = [stat['nvidia_failure_rate'] for stat in daily_stats]
+    ax1.plot(dates_array, amd_rates, color='#FF6B6B', linewidth=3, label='AMD', marker='o', markersize=6)
+    ax1.plot(dates_array, nvidia_rates, color='#4ECDC4', linewidth=3, label='NVIDIA', marker='s', markersize=6)
+    ax1.set_title('Overall Failure Rates Over Time', fontsize=TITLE_FONT_SIZE, color=TITLE_COLOR,
+                  fontfamily='monospace', fontweight='bold', pad=20)
+    ax1.set_ylabel('Failure Rate (%)', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
+    ax1.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
+    ax1.legend(fontsize=LEGEND_FONT_SIZE, loc='upper right', frameon=False,
+               labelcolor=LABEL_COLOR, prop={'family': 'monospace'})
+    # Format x-axis
+    ax1.tick_params(colors=LABEL_COLOR, labelsize=LABEL_FONT_SIZE)
+    ax1.xaxis.label.set_color(LABEL_COLOR)
+    ax1.yaxis.label.set_color(LABEL_COLOR)
+    # Plot 2: Test counts over time (stacked area chart)
+    amd_passed = [stat['amd_passed'] for stat in daily_stats]
+    amd_failed = [stat['amd_failed'] for stat in daily_stats]
+    amd_skipped = [stat['amd_skipped'] for stat in daily_stats]
+    nvidia_passed = [stat['nvidia_passed'] for stat in daily_stats]
+    nvidia_failed = [stat['nvidia_failed'] for stat in daily_stats]
+    nvidia_skipped = [stat['nvidia_skipped'] for stat in daily_stats]
+    # AMD stacked area
+    ax2.fill_between(dates_array, 0, amd_passed, color=COLORS['passed'], alpha=0.7, label='AMD Passed')
+    ax2.fill_between(dates_array, amd_passed, np.array(amd_passed) + np.array(amd_failed),
+                     color=COLORS['failed'], alpha=0.7, label='AMD Failed')
+    ax2.fill_between(dates_array, np.array(amd_passed) + np.array(amd_failed),
+                     np.array(amd_passed) + np.array(amd_failed) + np.array(amd_skipped),
+                     color=COLORS['skipped'], alpha=0.7, label='AMD Skipped')
+    # NVIDIA stacked area (offset to the right)
+    offset = 0.4  # Offset in days
+    dates_offset = dates_array + pd.Timedelta(days=offset)
+    ax2.fill_between(dates_offset, 0, nvidia_passed, color=COLORS['passed'], alpha=0.4, label='NVIDIA Passed')
+    ax2.fill_between(dates_offset, nvidia_passed, np.array(nvidia_passed) + np.array(nvidia_failed),
+                     color=COLORS['failed'], alpha=0.4, label='NVIDIA Failed')
+    ax2.fill_between(dates_offset, np.array(nvidia_passed) + np.array(nvidia_failed),
+                     np.array(nvidia_passed) + np.array(nvidia_failed) + np.array(nvidia_skipped),
+                     color=COLORS['skipped'], alpha=0.4, label='NVIDIA Skipped')
+    ax2.set_title('Test Results Over Time (Stacked)', fontsize=TITLE_FONT_SIZE, color=TITLE_COLOR,
+                  fontfamily='monospace', fontweight='bold', pad=20)
+    ax2.set_ylabel('Number of Tests', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
+    ax2.set_xlabel('Date', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
+    ax2.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
+    ax2.legend(fontsize=LEGEND_FONT_SIZE, loc='upper right', frameon=False,
+               labelcolor=LABEL_COLOR, prop={'family': 'monospace'})
+    # Format x-axis
+    ax2.tick_params(colors=LABEL_COLOR, labelsize=LABEL_FONT_SIZE)
+    ax2.xaxis.label.set_color(LABEL_COLOR)
+    ax2.yaxis.label.set_color(LABEL_COLOR)
+    # Rotate x-axis labels for better readability
+    for ax in [ax1, ax2]:
+        ax.tick_params(axis='x', rotation=45)
+    plt.tight_layout()
+    return fig
+def create_model_time_series(historical_df: pd.DataFrame, model_name: str) -> plt.Figure:
+    """Create time-series visualization for a specific model."""
+    if historical_df.empty or 'date' not in historical_df.columns:
+        fig, ax = plt.subplots(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT), facecolor=BLACK)
+        ax.set_facecolor(BLACK)
+        ax.text(0.5, 0.5, f'No historical data available for {model_name}',
+                horizontalalignment='center', verticalalignment='center',
+                transform=ax.transAxes, fontsize=20, color='#888888',
+                fontfamily='monospace', weight='normal')
+        ax.axis('off')
+        return fig
+    # Filter data for the specific model
+    model_data = historical_df[historical_df.index.str.lower() == model_name.lower()]
+    if model_data.empty:
+        fig, ax = plt.subplots(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT), facecolor=BLACK)
+        ax.set_facecolor(BLACK)
+        ax.text(0.5, 0.5, f'No data found for model: {model_name}',
+                horizontalalignment='center', verticalalignment='center',
+                transform=ax.transAxes, fontsize=20, color='#888888',
+                fontfamily='monospace', weight='normal')
+        ax.axis('off')
+        return fig
+    # Convert date column to datetime and sort
+    model_data = model_data.copy()
+    model_data['date_dt'] = pd.to_datetime(model_data['date'])
+    model_data = model_data.sort_values('date_dt')
+    # Extract statistics for each date
+    dates = model_data['date_dt'].values
+    amd_stats_list = []
+    nvidia_stats_list = []
+    for _, row in model_data.iterrows():
+        amd_stats, nvidia_stats = extract_model_data(row)[:2]
+        amd_stats_list.append(amd_stats)
+        nvidia_stats_list.append(nvidia_stats)
+    # Create the plot
+    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(FIGURE_WIDTH, FIGURE_HEIGHT), facecolor=BLACK)
+    ax1.set_facecolor(BLACK)
+    ax2.set_facecolor(BLACK)
+    # Plot 1: AMD results over time
+    amd_passed = [stats['passed'] for stats in amd_stats_list]
+    amd_failed = [stats['failed'] for stats in amd_stats_list]
+    amd_skipped = [stats['skipped'] for stats in amd_stats_list]
+    ax1.plot(dates, amd_passed, color=COLORS['passed'], linewidth=3, label='Passed', marker='o', markersize=6)
+    ax1.plot(dates, amd_failed, color=COLORS['failed'], linewidth=3, label='Failed', marker='s', markersize=6)
+    ax1.plot(dates, amd_skipped, color=COLORS['skipped'], linewidth=3, label='Skipped', marker='^', markersize=6)
+    ax1.set_title(f'{model_name.upper()} - AMD Results Over Time', fontsize=TITLE_FONT_SIZE, color=TITLE_COLOR,
+                  fontfamily='monospace', fontweight='bold', pad=20)
+    ax1.set_ylabel('Number of Tests', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
+    ax1.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
+    ax1.legend(fontsize=LEGEND_FONT_SIZE, loc='upper right', frameon=False,
+               labelcolor=LABEL_COLOR, prop={'family': 'monospace'})
+    # Plot 2: NVIDIA results over time
+    nvidia_passed = [stats['passed'] for stats in nvidia_stats_list]
+    nvidia_failed = [stats['failed'] for stats in nvidia_stats_list]
+    nvidia_skipped = [stats['skipped'] for stats in nvidia_stats_list]
+    ax2.plot(dates, nvidia_passed, color=COLORS['passed'], linewidth=3, label='Passed', marker='o', markersize=6)
+    ax2.plot(dates, nvidia_failed, color=COLORS['failed'], linewidth=3, label='Failed', marker='s', markersize=6)
+    ax2.plot(dates, nvidia_skipped, color=COLORS['skipped'], linewidth=3, label='Skipped', marker='^', markersize=6)
+    ax2.set_title(f'{model_name.upper()} - NVIDIA Results Over Time', fontsize=TITLE_FONT_SIZE, color=TITLE_COLOR,
+                  fontfamily='monospace', fontweight='bold', pad=20)
+    ax2.set_ylabel('Number of Tests', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
+    ax2.set_xlabel('Date', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
+    ax2.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
+    ax2.legend(fontsize=LEGEND_FONT_SIZE, loc='upper right', frameon=False,
+               labelcolor=LABEL_COLOR, prop={'family': 'monospace'})
+    # Format axes
+    for ax in [ax1, ax2]:
+        ax.tick_params(colors=LABEL_COLOR, labelsize=LABEL_FONT_SIZE)
+        ax.xaxis.label.set_color(LABEL_COLOR)
+        ax.yaxis.label.set_color(LABEL_COLOR)
+        ax.tick_params(axis='x', rotation=45)
+    plt.tight_layout()
+    return fig