timeseries_visualization / src /querychat_helpers.py
fmegahed's picture
Moving app to gradio to avoid flickering issues
22ced1b
"""
QueryChat initialization and filtered DataFrame helpers.
Provides convenience wrappers around the ``querychat`` library for
natural-language filtering of time-series DataFrames inside a Gradio
app. All functions degrade gracefully when the package or an API key
is unavailable.
"""
from __future__ import annotations
import os
from typing import List, Optional
import pandas as pd
try:
from querychat.gradio import QueryChat as _QueryChat
_QUERYCHAT_AVAILABLE = True
except ImportError: # pragma: no cover
_QUERYCHAT_AVAILABLE = False
# ---------------------------------------------------------------------------
# Availability check
# ---------------------------------------------------------------------------
def check_querychat_available() -> bool:
"""Return ``True`` when both *querychat* is installed and an API key is set.
QueryChat requires an ``OPENAI_API_KEY`` environment variable. This
helper lets callers gate UI elements behind a simple boolean.
"""
if not _QUERYCHAT_AVAILABLE:
return False
return bool(os.environ.get("OPENAI_API_KEY"))
# ---------------------------------------------------------------------------
# QueryChat factory
# ---------------------------------------------------------------------------
def create_querychat(
df: pd.DataFrame,
name: str = "dataset",
date_col: str = "date",
y_cols: Optional[List[str]] = None,
freq_label: str = "",
):
"""Create and return a QueryChat instance bound to *df*.
Parameters
----------
df:
The pandas DataFrame to expose to the chat interface.
name:
A human-readable name for the dataset (used in the description).
date_col:
Name of the date/time column.
y_cols:
Names of the value (numeric) columns. If ``None``, an empty
list is used in the description.
freq_label:
Optional frequency label (e.g. ``"Monthly"``, ``"Daily"``).
Returns
-------
QueryChat instance
The object returned by ``QueryChat()``.
Raises
------
RuntimeError
If querychat is not installed.
"""
if not _QUERYCHAT_AVAILABLE:
raise RuntimeError(
"The 'querychat' package is not installed. "
"Install it with: pip install 'querychat[gradio]'"
)
if y_cols is None:
y_cols = []
value_cols_str = ", ".join(y_cols) if y_cols else "none specified"
freq_part = f" Frequency: {freq_label}." if freq_label else ""
data_description = (
f"This dataset is named '{name}'. "
f"It contains {len(df):,} rows. "
f"The date column is '{date_col}'. "
f"Value columns: {value_cols_str}."
f"{freq_part}"
)
# Build example bullets that reference actual column names
if y_cols:
first_y = y_cols[0]
filter_example = f'- "Filter where {first_y} > median"'
else:
filter_example = '- "Filter where value > 100"'
greeting = (
f"Hi! I can help you filter and explore the **{name}** dataset. "
"Try asking me something like:\n"
'- "Show only the last 5 years"\n'
f"{filter_example}\n"
'- "Show rows from January to March"'
)
qc = _QueryChat(
data_source=df,
table_name=name.replace(" ", "_"),
client="openai/gpt-5.2-2025-12-11",
data_description=data_description,
greeting=greeting,
)
return qc
# ---------------------------------------------------------------------------
# Filtered DataFrame extraction
# ---------------------------------------------------------------------------
def get_filtered_pandas_df(qc, state_dict=None) -> pd.DataFrame:
"""Extract the currently filtered DataFrame from a QueryChat instance.
The underlying ``qc.df()`` may return a *narwhals* DataFrame rather
than a pandas one. This helper transparently converts when needed
and falls back to the original frame on any error.
Parameters
----------
qc:
A QueryChat instance previously created via :func:`create_querychat`.
state_dict:
The Gradio state dictionary from ``qc.ui()``. Required for the
Gradio variant of QueryChat.
Returns
-------
pd.DataFrame
The filtered data as a pandas DataFrame.
"""
try:
if state_dict is not None:
result = qc.df(state_dict)
else:
result = qc.df()
# narwhals (or polars) DataFrames expose .to_pandas()
if hasattr(result, "to_pandas"):
return result.to_pandas()
# narwhals also has .to_native() which may give pandas directly
if hasattr(result, "to_native"):
native = result.to_native()
if isinstance(native, pd.DataFrame):
return native
return pd.DataFrame(native)
# Already a pandas DataFrame
if isinstance(result, pd.DataFrame):
return result
# Unknown type -- attempt conversion as a last resort
return pd.DataFrame(result)
except Exception: # noqa: BLE001
# If anything goes wrong, surface the unfiltered data so the app
# can continue to function.
try:
raw = qc.df() if state_dict is None else qc.df(state_dict)
if isinstance(raw, pd.DataFrame):
return raw
except Exception: # noqa: BLE001
pass
return pd.DataFrame()