again / controllers /linear_regression_controller.py
Beam2513's picture
Upload 127 files
798602c verified
from __future__ import annotations
from typing import List, Optional, Sequence, Tuple
from matplotlib.figure import Figure
import numpy as np
import pandas as pd
from core.linear_regression import run_linear_regression as _run_linear_regression
def _select_working_dataframe(
df: Optional[pd.DataFrame],
filtered_df: Optional[pd.DataFrame],
) -> pd.DataFrame:
"""
Use the filtered dataframe if it is non-empty; otherwise fall back to the
original dataframe. This mirrors the behaviour used in other tabs.
"""
if df is None:
raise ValueError("No dataset loaded.")
if filtered_df is not None and not filtered_df.empty:
return filtered_df
if df.empty:
raise ValueError("The dataset is empty.")
return df
def _parse_confidence_level(text: str) -> float:
"""
Parse a confidence level like '0.95' into an alpha value for statsmodels.
Returns
-------
alpha : float
Significance level (e.g. 0.05 for a 95% confidence level).
"""
s = str(text).strip()
if not s:
raise ValueError("Confidence level is required (e.g. 0.95).")
try:
level = float(s)
except ValueError as exc:
raise ValueError("Confidence level must be a numeric value between 0 and 1.") from exc
if not (0 < level < 1):
raise ValueError("Confidence level must be between 0 and 1 (e.g. 0.95).")
# statsmodels expects alpha, not the confidence level itself
return 1.0 - level
def _parse_range(text: str) -> Optional[np.ndarray]:
"""
Parse a range string like '0, 10' into a numpy array suitable for predictions.
Returns
-------
np.ndarray or None
If the string is empty or only whitespace, returns None.
Otherwise returns a 1-D array of 100 evenly spaced values between
the parsed minimum and maximum.
"""
s = str(text).strip()
if not s:
return None
parts = s.split(",")
if len(parts) != 2:
raise ValueError("Range must have the form 'min, max'.")
try:
lo = float(parts[0].strip())
hi = float(parts[1].strip())
except ValueError as exc:
raise ValueError("Range values must be numeric (e.g. '0, 10').") from exc
if lo >= hi:
raise ValueError("Range minimum must be strictly less than the maximum.")
return np.linspace(lo, hi, 100)
def run_linear_regression(
*,
df: Optional[pd.DataFrame],
filtered_df: Optional[pd.DataFrame],
formula_check: bool,
formula_text: str,
formula_latex: str,
dependent_var: Optional[str],
independent_vars: List[str],
alpha_input: str,
intercept: bool,
graph_check: bool,
graph_type: str,
show_ci: bool,
show_pi: bool,
fit_to_obs: bool,
x_range_text: str,
round_digits: int = 4,
) -> Tuple[str, pd.DataFrame, Optional[Figure]]:
"""
High-level controller used by the Linear Regression tab.
This function takes raw user input from the UI, performs validation and
parsing, calls the stats layer, and returns a tuple:
(summary_html, params_df_rounded, figure)
Any exceptions should be caught in the tab layer and turned into user-
facing error messages.
"""
working_df = _select_working_dataframe(df, filtered_df)
if dependent_var is None or dependent_var == "":
raise ValueError("Please select a dependent variable.")
if not independent_vars:
raise ValueError("Please select at least one independent variable.")
# For the "Simple Regression" graph we require exactly one independent variable.
if graph_check and graph_type == "Simple Regression" and len(independent_vars) != 1:
raise ValueError(
"The 'Simple Regression' graph is only available when exactly one "
"independent variable is selected."
)
# Parse confidence level
alpha = _parse_confidence_level(alpha_input)
# Parse X range only when needed: Simple Regression + graph + not fit_to_obs
x_vector = None
if graph_check and graph_type == "Simple Regression" and not fit_to_obs:
x_vector = _parse_range(x_range_text)
summary_html, params_df, fig = _run_linear_regression(
df=working_df,
formula_check=formula_check,
formula_text=formula_text,
formula_latex=formula_latex,
dependent_var=dependent_var,
independent_vars=independent_vars,
alpha=alpha,
intercept=intercept,
create_graph=graph_check,
graph_type=graph_type,
show_ci=show_ci,
show_pi=show_pi,
fit_to_obs=fit_to_obs,
x_vector=x_vector,
)
# Rounding happens here, not in the stats layer.
params_df_rounded = params_df.round(round_digits)
return summary_html, params_df_rounded, fig