| | from __future__ import annotations
|
| |
|
| | from typing import List, Optional, Sequence, Tuple
|
| |
|
| | from matplotlib.figure import Figure
|
| | import numpy as np
|
| | import pandas as pd
|
| |
|
| | from core.linear_regression import run_linear_regression as _run_linear_regression
|
| |
|
| |
|
| | def _select_working_dataframe(
|
| | df: Optional[pd.DataFrame],
|
| | filtered_df: Optional[pd.DataFrame],
|
| | ) -> pd.DataFrame:
|
| | """
|
| | Use the filtered dataframe if it is non-empty; otherwise fall back to the
|
| | original dataframe. This mirrors the behaviour used in other tabs.
|
| | """
|
| | if df is None:
|
| | raise ValueError("No dataset loaded.")
|
| |
|
| | if filtered_df is not None and not filtered_df.empty:
|
| | return filtered_df
|
| |
|
| | if df.empty:
|
| | raise ValueError("The dataset is empty.")
|
| |
|
| | return df
|
| |
|
| |
|
| | def _parse_confidence_level(text: str) -> float:
|
| | """
|
| | Parse a confidence level like '0.95' into an alpha value for statsmodels.
|
| |
|
| | Returns
|
| | -------
|
| | alpha : float
|
| | Significance level (e.g. 0.05 for a 95% confidence level).
|
| | """
|
| | s = str(text).strip()
|
| | if not s:
|
| | raise ValueError("Confidence level is required (e.g. 0.95).")
|
| | try:
|
| | level = float(s)
|
| | except ValueError as exc:
|
| | raise ValueError("Confidence level must be a numeric value between 0 and 1.") from exc
|
| |
|
| | if not (0 < level < 1):
|
| | raise ValueError("Confidence level must be between 0 and 1 (e.g. 0.95).")
|
| |
|
| |
|
| | return 1.0 - level
|
| |
|
| |
|
| | def _parse_range(text: str) -> Optional[np.ndarray]:
|
| | """
|
| | Parse a range string like '0, 10' into a numpy array suitable for predictions.
|
| |
|
| | Returns
|
| | -------
|
| | np.ndarray or None
|
| | If the string is empty or only whitespace, returns None.
|
| | Otherwise returns a 1-D array of 100 evenly spaced values between
|
| | the parsed minimum and maximum.
|
| | """
|
| | s = str(text).strip()
|
| | if not s:
|
| | return None
|
| |
|
| | parts = s.split(",")
|
| | if len(parts) != 2:
|
| | raise ValueError("Range must have the form 'min, max'.")
|
| |
|
| | try:
|
| | lo = float(parts[0].strip())
|
| | hi = float(parts[1].strip())
|
| | except ValueError as exc:
|
| | raise ValueError("Range values must be numeric (e.g. '0, 10').") from exc
|
| |
|
| | if lo >= hi:
|
| | raise ValueError("Range minimum must be strictly less than the maximum.")
|
| |
|
| | return np.linspace(lo, hi, 100)
|
| |
|
| |
|
| | def run_linear_regression(
|
| | *,
|
| | df: Optional[pd.DataFrame],
|
| | filtered_df: Optional[pd.DataFrame],
|
| | formula_check: bool,
|
| | formula_text: str,
|
| | formula_latex: str,
|
| | dependent_var: Optional[str],
|
| | independent_vars: List[str],
|
| | alpha_input: str,
|
| | intercept: bool,
|
| | graph_check: bool,
|
| | graph_type: str,
|
| | show_ci: bool,
|
| | show_pi: bool,
|
| | fit_to_obs: bool,
|
| | x_range_text: str,
|
| | round_digits: int = 4,
|
| | ) -> Tuple[str, pd.DataFrame, Optional[Figure]]:
|
| | """
|
| | High-level controller used by the Linear Regression tab.
|
| |
|
| | This function takes raw user input from the UI, performs validation and
|
| | parsing, calls the stats layer, and returns a tuple:
|
| |
|
| | (summary_html, params_df_rounded, figure)
|
| |
|
| | Any exceptions should be caught in the tab layer and turned into user-
|
| | facing error messages.
|
| | """
|
| | working_df = _select_working_dataframe(df, filtered_df)
|
| |
|
| | if dependent_var is None or dependent_var == "":
|
| | raise ValueError("Please select a dependent variable.")
|
| |
|
| | if not independent_vars:
|
| | raise ValueError("Please select at least one independent variable.")
|
| |
|
| |
|
| | if graph_check and graph_type == "Simple Regression" and len(independent_vars) != 1:
|
| | raise ValueError(
|
| | "The 'Simple Regression' graph is only available when exactly one "
|
| | "independent variable is selected."
|
| | )
|
| |
|
| |
|
| | alpha = _parse_confidence_level(alpha_input)
|
| |
|
| |
|
| | x_vector = None
|
| | if graph_check and graph_type == "Simple Regression" and not fit_to_obs:
|
| | x_vector = _parse_range(x_range_text)
|
| |
|
| | summary_html, params_df, fig = _run_linear_regression(
|
| | df=working_df,
|
| | formula_check=formula_check,
|
| | formula_text=formula_text,
|
| | formula_latex=formula_latex,
|
| | dependent_var=dependent_var,
|
| | independent_vars=independent_vars,
|
| | alpha=alpha,
|
| | intercept=intercept,
|
| | create_graph=graph_check,
|
| | graph_type=graph_type,
|
| | show_ci=show_ci,
|
| | show_pi=show_pi,
|
| | fit_to_obs=fit_to_obs,
|
| | x_vector=x_vector,
|
| | )
|
| |
|
| |
|
| | params_df_rounded = params_df.round(round_digits)
|
| |
|
| | return summary_html, params_df_rounded, fig
|
| |
|