Spaces:
Running
Running
송종윤/AI Productivity팀(SR)/삼성전자
add models, add speed and time results, change scatter plot design
a452b10
| import pandas as pd | |
| from pathlib import Path | |
| from typing import Optional | |
| # Global cache variables | |
| _category_df_cache: dict[str, Optional[pd.DataFrame]] = {"open": None} | |
| _language_df_cache: dict[str, Optional[pd.DataFrame]] = {"open": None} | |
| def _load_category_csv(data_prefix: str = "") -> pd.DataFrame: | |
| """Load the category CSV file with proper encoding and delimiter.""" | |
| abs_path = Path(__file__).parent | |
| df = pd.read_csv(str(abs_path / "data" / data_prefix / "stats.csv"), encoding='utf-8', delimiter="\t") | |
| return df.copy() | |
| def _load_language_csv(data_prefix: str = "open/") -> pd.DataFrame: | |
| """Load the language CSV file with proper encoding and delimiter.""" | |
| abs_path = Path(__file__).parent | |
| df = pd.read_csv(str(abs_path / "data" / data_prefix / "stats_lang.csv"), encoding='utf-8', delimiter="\t") | |
| return df.copy() | |
| def get_category_dataframe(processed: bool = True, data_prefix: str = "open/") -> pd.DataFrame: | |
| """ | |
| Get the category dataframe. | |
| Args: | |
| processed: If True, returns processed dataframe (for vis_utils.py compatibility) | |
| If False, returns raw dataframe sorted by Overall (for data_utils.py compatibility) | |
| Returns: | |
| pd.DataFrame: The category dataframe | |
| """ | |
| global _category_df_cache | |
| if _category_df_cache.get(data_prefix) is None: | |
| _category_df_cache[data_prefix] = _load_category_csv(data_prefix) | |
| df = _category_df_cache[data_prefix].copy() | |
| if processed: | |
| # Apply vis_utils.py processing | |
| required_cols = ['Model Name', 'Link', "Group", "Overall", "Med. Len.", "Med. Resp. Len.", "Time to First Answer Token", "End-to-End Response Time", "Speed", "Parameter Size (B)", "Type", "Model Type", "Think", 'Content Generation', 'Editing', 'Data Analysis', | |
| 'Reasoning', 'Hallucination', 'Safety', 'Repetition', | |
| 'Summarization', 'Translation', 'Multi-Turn'] | |
| for col in required_cols: | |
| if col not in df.columns: | |
| if col in ["Link", "Group"]: | |
| df[col] = "" | |
| else: | |
| df[col] = 0 | |
| from constants import NUMERIC_COLS_CATEGORY, NUMERIC_INT_COLS_CATEGORY | |
| for col in NUMERIC_COLS_CATEGORY: | |
| if col in df.columns: | |
| if col in NUMERIC_INT_COLS_CATEGORY: | |
| df[col] = pd.to_numeric(df[col], errors='coerce').round(0) | |
| else: | |
| df[col] = pd.to_numeric(df[col], errors='coerce').round(3) | |
| else: | |
| df[col] = 0 | |
| if "Think" not in df.columns: | |
| df["Think"] = "Off" | |
| df = df.fillna('') | |
| else: | |
| # Apply data_utils.py processing | |
| df = df.sort_values("Overall", ascending=False) | |
| return df | |
| def get_language_dataframe(processed: bool = True, data_prefix: str = "open/") -> pd.DataFrame: | |
| """ | |
| Get the language dataframe. | |
| Args: | |
| processed: If True, returns processed dataframe (for vis_utils.py compatibility) | |
| If False, returns raw dataframe sorted by Overall (for data_utils.py compatibility) | |
| Returns: | |
| pd.DataFrame: The language dataframe | |
| """ | |
| global _language_df_cache | |
| if _language_df_cache.get(data_prefix) is None: | |
| _language_df_cache[data_prefix] = _load_language_csv(data_prefix) | |
| df = _language_df_cache[data_prefix].copy() | |
| if processed: | |
| # Apply vis_utils.py processing | |
| language_cols = ['Model Name', 'Link', "Group", "Overall", "Med. Len.", "Med. Resp. Len.", "Time to First Answer Token", "End-to-End Response Time", "Speed", "Parameter Size (B)", "Type", "Model Type", "Think", 'KO', 'EN', 'JA', 'ZH', 'PL', 'DE', 'PT', 'ES', 'FR', 'IT', 'RU', 'VI'] | |
| for col in language_cols: | |
| if col not in df.columns: | |
| if col in ["Link", "Group"]: | |
| df[col] = "" | |
| else: | |
| df[col] = 0 | |
| from constants import NUMERIC_COLS_LANGUAGE, NUMERIC_INT_COLS_LANGUAGE | |
| for col in NUMERIC_COLS_LANGUAGE: | |
| if col in df.columns: | |
| if col in NUMERIC_INT_COLS_LANGUAGE: | |
| df[col] = pd.to_numeric(df[col], errors='coerce').round(0) | |
| else: | |
| df[col] = pd.to_numeric(df[col], errors='coerce').round(3) | |
| else: | |
| df[col] = 0 | |
| df = df.fillna('') | |
| else: | |
| # Apply data_utils.py processing | |
| df = df.sort_values("Overall", ascending=False) | |
| return df | |
| def clear_cache(): | |
| """Clear the cached dataframes to force reload on next access.""" | |
| global _category_df_cache, _language_df_cache | |
| _category_df_cache = {"open": None} | |
| _language_df_cache = {"open": None} | |