Spaces:
Sleeping
Sleeping
| # AI-Assisted Code — Academic Integrity Notice | |
| # Generated with The App Builder. ESCP coursework. | |
| # Student must be able to explain all code when asked. | |
| import json | |
| import os | |
| import shutil | |
| import time | |
| import traceback | |
| from pathlib import Path | |
| from typing import Any, Dict, List, Tuple | |
| import gradio as gr | |
| import pandas as pd | |
| import papermill as pm | |
| import plotly.graph_objects as go | |
| BASE_DIR = Path(__file__).resolve().parent | |
| NOTEBOOK_NAME = os.environ.get("NB_ANALYSIS", "2a_Python_Analysis_Constance_Gonnelle.ipynb").strip() | |
| # Input files shipped with the Space | |
| SALES_SOURCE = BASE_DIR / "books_data.csv" | |
| REVIEWS_SOURCE = BASE_DIR / "synthetic_book_reviews.csv" | |
| # Filenames expected by the notebook | |
| SALES_ALIAS = BASE_DIR / "synthetic_sales_data.csv" | |
| REVIEWS_ALIAS = BASE_DIR / "synthetic_book_reviews.csv" | |
| RUNS_DIR = BASE_DIR / "runs" | |
| ART_DIR = BASE_DIR / "artifacts" | |
| PY_FIG_DIR = ART_DIR / "py" / "figures" | |
| PY_TAB_DIR = ART_DIR / "py" / "tables" | |
| PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800")) | |
| MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "100")) | |
| MAX_LOG_CHARS = int(os.environ.get("MAX_LOG_CHARS", "8000")) | |
| def ensure_dirs() -> None: | |
| """Create all folders used by the app.""" | |
| for folder in [RUNS_DIR, ART_DIR, PY_FIG_DIR, PY_TAB_DIR]: | |
| folder.mkdir(parents=True, exist_ok=True) | |
| def stamp() -> str: | |
| """Return a timestamp for run outputs.""" | |
| return time.strftime("%Y%m%d-%H%M%S") | |
| def tail(text: str, limit: int = MAX_LOG_CHARS) -> str: | |
| """Keep logs short enough for the UI.""" | |
| return (text or "")[-limit:] | |
| def prepare_input_files() -> List[str]: | |
| """ | |
| Make sure the notebook sees the exact filenames it expects. | |
| This is like giving a spreadsheet the same tab names every time. | |
| """ | |
| messages = [] | |
| if not SALES_SOURCE.exists(): | |
| raise FileNotFoundError("books_data.csv is missing.") | |
| if not REVIEWS_SOURCE.exists(): | |
| raise FileNotFoundError("synthetic_book_reviews.csv is missing.") | |
| if SALES_SOURCE.resolve() != SALES_ALIAS.resolve(): | |
| shutil.copyfile(SALES_SOURCE, SALES_ALIAS) | |
| messages.append("Copied books_data.csv -> synthetic_sales_data.csv") | |
| else: | |
| messages.append("synthetic_book_reviews.csv already uses the expected filename") | |
| messages.append("synthetic_book_reviews.csv ready") | |
| return messages | |
| def artifacts_index() -> Dict[str, Any]: | |
| """List the files created by the notebook.""" | |
| return { | |
| "figures": sorted( | |
| p.name for p in PY_FIG_DIR.glob("*") if p.suffix.lower() in {".png", ".jpg", ".jpeg"} | |
| ), | |
| "tables": sorted( | |
| p.name for p in PY_TAB_DIR.glob("*") if p.suffix.lower() in {".csv", ".json"} | |
| ), | |
| } | |
| def load_json(path: Path) -> Dict[str, Any]: | |
| """Read JSON safely.""" | |
| with path.open(encoding="utf-8") as file: | |
| return json.load(file) | |
| def load_table(path: Path) -> pd.DataFrame: | |
| """Load a CSV or JSON artifact for preview.""" | |
| try: | |
| if path.suffix.lower() == ".json": | |
| data = load_json(path) | |
| if isinstance(data, dict): | |
| return pd.DataFrame([data]) | |
| return pd.DataFrame(data) | |
| return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS) | |
| except Exception as exc: | |
| return pd.DataFrame([{"error": str(exc)}]) | |
| def run_notebook() -> str: | |
| """Execute the student's analysis notebook with papermill.""" | |
| ensure_dirs() | |
| notebook_path = BASE_DIR / NOTEBOOK_NAME | |
| if not notebook_path.exists(): | |
| return f"ERROR: Notebook not found: {NOTEBOOK_NAME}" | |
| try: | |
| prep_messages = prepare_input_files() | |
| output_path = RUNS_DIR / f"run_{stamp()}_{NOTEBOOK_NAME}" | |
| pm.execute_notebook( | |
| input_path=str(notebook_path), | |
| output_path=str(output_path), | |
| cwd=str(BASE_DIR), | |
| log_output=True, | |
| progress_bar=False, | |
| request_save_on_cell_execute=True, | |
| execution_timeout=PAPERMILL_TIMEOUT, | |
| ) | |
| idx = artifacts_index() | |
| figure_lines = [f" - {name}" for name in idx["figures"]] or [" - (none)"] | |
| table_lines = [f" - {name}" for name in idx["tables"]] or [" - (none)"] | |
| lines = [ | |
| "Notebook executed successfully.", | |
| *prep_messages, | |
| "", | |
| f"Notebook: {NOTEBOOK_NAME}", | |
| f"Run file: {output_path.name}", | |
| "", | |
| "Generated figures:", | |
| *figure_lines, | |
| "", | |
| "Generated tables:", | |
| *table_lines, | |
| ] | |
| return "\n".join(lines) | |
| except Exception as exc: | |
| return ( | |
| f"FAILED: {exc}\n\n" | |
| f"{tail(traceback.format_exc(), 3000)}" | |
| ) | |
| def load_kpis() -> Dict[str, Any]: | |
| """Load the KPI summary exported by the notebook.""" | |
| for candidate in [PY_FIG_DIR / "kpis.json", PY_TAB_DIR / "kpis.json"]: | |
| if candidate.exists(): | |
| try: | |
| return load_json(candidate) | |
| except Exception: | |
| pass | |
| return {} | |
| def render_kpis() -> str: | |
| """Build lightweight KPI cards without background images.""" | |
| kpis = load_kpis() | |
| if not kpis: | |
| return """ | |
| <div class="empty-state"> | |
| <div class="empty-icon">📊</div> | |
| <div class="empty-title">No outputs yet</div> | |
| <div class="empty-text">Click “Run notebook” to generate figures, tables, and KPI cards.</div> | |
| </div> | |
| """ | |
| cards = [] | |
| labels = { | |
| "n_titles": "Book Titles", | |
| "n_months": "Months", | |
| "total_units_sold": "Units Sold", | |
| "total_revenue": "Revenue", | |
| } | |
| for key, value in kpis.items(): | |
| nice_label = labels.get(key, key.replace("_", " ").title()) | |
| if isinstance(value, (int, float)) and abs(value) >= 1000: | |
| value = f"{value:,.0f}" | |
| cards.append( | |
| f""" | |
| <div class="kpi-card"> | |
| <div class="kpi-label">{nice_label}</div> | |
| <div class="kpi-value">{value}</div> | |
| </div> | |
| """ | |
| ) | |
| return f'<div class="kpi-grid">{"".join(cards)}</div>' | |
| def empty_chart(title: str) -> go.Figure: | |
| """Return a placeholder chart.""" | |
| fig = go.Figure() | |
| fig.update_layout( | |
| title=title, | |
| template="plotly_white", | |
| height=420, | |
| annotations=[ | |
| dict( | |
| text="Run the notebook to populate this chart", | |
| x=0.5, | |
| y=0.5, | |
| xref="paper", | |
| yref="paper", | |
| showarrow=False, | |
| ) | |
| ], | |
| ) | |
| return fig | |
| def build_sales_chart() -> go.Figure: | |
| """Create an interactive monthly trend chart from df_dashboard.csv.""" | |
| path = PY_TAB_DIR / "df_dashboard.csv" | |
| if not path.exists(): | |
| return empty_chart("Monthly Overview") | |
| df = pd.read_csv(path) | |
| date_col = next((col for col in df.columns if "month" in col.lower() or "date" in col.lower()), None) | |
| metric_cols = [col for col in df.columns if col != date_col and pd.api.types.is_numeric_dtype(df[col])] | |
| if not date_col or not metric_cols: | |
| return empty_chart("Monthly Overview") | |
| df[date_col] = pd.to_datetime(df[date_col], errors="coerce") | |
| fig = go.Figure() | |
| for column in metric_cols: | |
| fig.add_trace( | |
| go.Scatter( | |
| x=df[date_col], | |
| y=df[column], | |
| mode="lines+markers", | |
| name=column.replace("_", " ").title(), | |
| hovertemplate=f"<b>{column.replace('_', ' ').title()}</b><br>%{{x|%b %Y}}: %{{y:,.0f}}<extra></extra>", | |
| ) | |
| ) | |
| fig.update_layout(template="plotly_white", height=450, title="Monthly Overview", hovermode="x unified") | |
| return fig | |
| def build_top_sellers_chart() -> go.Figure: | |
| """Create an interactive top-sellers bar chart.""" | |
| path = PY_TAB_DIR / "top_titles_by_units_sold.csv" | |
| if not path.exists(): | |
| return empty_chart("Top Selling Titles") | |
| df = pd.read_csv(path).head(15) | |
| title_col = next((col for col in df.columns if "title" in col.lower()), df.columns[0]) | |
| value_col = next((col for col in df.columns if "unit" in col.lower() or "sold" in col.lower()), df.columns[-1]) | |
| fig = go.Figure( | |
| go.Bar( | |
| x=df[value_col], | |
| y=df[title_col], | |
| orientation="h", | |
| hovertemplate="<b>%{y}</b><br>Units: %{x:,.0f}<extra></extra>", | |
| ) | |
| ) | |
| fig.update_layout(template="plotly_white", height=max(420, len(df) * 28), title="Top Selling Titles") | |
| fig.update_yaxes(autorange="reversed") | |
| return fig | |
| def build_sentiment_chart() -> go.Figure: | |
| """Create an interactive sentiment chart.""" | |
| path = PY_TAB_DIR / "sentiment_counts_sampled.csv" | |
| if not path.exists(): | |
| return empty_chart("Sentiment Distribution") | |
| df = pd.read_csv(path) | |
| title_col = df.columns[0] | |
| sentiment_cols = [col for col in ["negative", "neutral", "positive"] if col in df.columns] | |
| if not sentiment_cols: | |
| return empty_chart("Sentiment Distribution") | |
| fig = go.Figure() | |
| for col in sentiment_cols: | |
| fig.add_trace( | |
| go.Bar( | |
| x=df[col], | |
| y=df[title_col], | |
| name=col.title(), | |
| orientation="h", | |
| hovertemplate=f"<b>{col.title()}</b>: %{{x}}<extra></extra>", | |
| ) | |
| ) | |
| fig.update_layout( | |
| template="plotly_white", | |
| barmode="stack", | |
| height=max(420, len(df) * 28), | |
| title="Sentiment Distribution by Book", | |
| ) | |
| fig.update_yaxes(autorange="reversed") | |
| return fig | |
| def refresh_gallery() -> Tuple[List[Tuple[str, str]], gr.Dropdown, pd.DataFrame]: | |
| """Refresh static figures and tables.""" | |
| figures = [(str(path), path.stem.replace("_", " ").title()) for path in sorted(PY_FIG_DIR.glob("*.png"))] | |
| idx = artifacts_index() | |
| table_choices = idx["tables"] | |
| first_table = load_table(PY_TAB_DIR / table_choices[0]) if table_choices else pd.DataFrame() | |
| return figures, gr.update(choices=table_choices, value=table_choices[0] if table_choices else None), first_table | |
| def on_table_select(choice: str) -> pd.DataFrame: | |
| """Load the selected table.""" | |
| if not choice: | |
| return pd.DataFrame([{"hint": "Select a table above."}]) | |
| path = PY_TAB_DIR / choice | |
| if not path.exists(): | |
| return pd.DataFrame([{"error": f"File not found: {choice}"}]) | |
| return load_table(path) | |
| def answer_question(user_msg: str, history: List[Dict[str, str]]): | |
| """Simple assistant that routes the user to the right chart or table.""" | |
| idx = artifacts_index() | |
| text = (user_msg or "").strip() | |
| if not text: | |
| return history, "", None, None | |
| msg = text.lower() | |
| reply = "Ask about sales trends, sentiment, top sellers, or dashboard overview." | |
| chart = None | |
| table = None | |
| if not idx["figures"] and not idx["tables"]: | |
| reply = "No artifacts exist yet. Run the notebook first in the first tab." | |
| elif any(word in msg for word in ["trend", "sales", "monthly", "forecast", "arima"]): | |
| reply = "Here is the monthly overview built from the notebook outputs." | |
| chart = build_sales_chart() | |
| elif any(word in msg for word in ["sentiment", "review", "positive", "negative"]): | |
| reply = "Here is the sentiment distribution across the sampled books." | |
| chart = build_sentiment_chart() | |
| elif any(word in msg for word in ["top", "best", "popular", "rank"]): | |
| reply = "Here is the top-sellers table exported by the notebook." | |
| path = PY_TAB_DIR / "top_titles_by_units_sold.csv" | |
| table = load_table(path) if path.exists() else pd.DataFrame([{"error": "top_titles_by_units_sold.csv not found"}]) | |
| elif any(word in msg for word in ["overview", "dashboard", "kpi", "summary"]): | |
| reply = "Here is the dashboard overview table." | |
| path = PY_TAB_DIR / "df_dashboard.csv" | |
| table = load_table(path) if path.exists() else pd.DataFrame([{"error": "df_dashboard.csv not found"}]) | |
| new_history = (history or []) + [ | |
| {"role": "user", "content": text}, | |
| {"role": "assistant", "content": reply}, | |
| ] | |
| return new_history, "", chart, table | |
| def refresh_dashboard(): | |
| """Refresh all dashboard widgets together.""" | |
| kpis = render_kpis() | |
| sales = build_sales_chart() | |
| sentiment = build_sentiment_chart() | |
| top = build_top_sellers_chart() | |
| figures, dropdown, table = refresh_gallery() | |
| return kpis, sales, sentiment, top, figures, dropdown, table | |
| ensure_dirs() | |
| with gr.Blocks(title="Book Analytics Space", css_paths=["style.css"]) as demo: | |
| gr.Markdown( | |
| "# Book Analytics Space\n" | |
| "This Gradio app runs your notebook on the two CSV datasets and then exposes the exported outputs." | |
| ) | |
| with gr.Tab("1. Run Notebook"): | |
| gr.Markdown( | |
| "Use this tab to execute the notebook on the packaged datasets. " | |
| "The app automatically maps `books_data.csv` to the filename expected by the notebook." | |
| ) | |
| run_btn = gr.Button("Run notebook", variant="primary") | |
| run_log = gr.Textbox(label="Execution log", lines=20, interactive=False) | |
| run_btn.click(run_notebook, outputs=run_log) | |
| with gr.Tab("2. Dashboard"): | |
| kpi_html = gr.HTML(value=render_kpis()) | |
| refresh_btn = gr.Button("Refresh dashboard", variant="secondary") | |
| sales_plot = gr.Plot(label="Monthly Overview") | |
| sentiment_plot = gr.Plot(label="Sentiment Distribution") | |
| top_plot = gr.Plot(label="Top Sellers") | |
| gallery = gr.Gallery(label="Static Figures", columns=2, height=420, object_fit="contain") | |
| table_dropdown = gr.Dropdown(label="Tables", choices=[], interactive=True) | |
| table_display = gr.Dataframe(label="Table Preview", interactive=False) | |
| refresh_btn.click( | |
| refresh_dashboard, | |
| outputs=[kpi_html, sales_plot, sentiment_plot, top_plot, gallery, table_dropdown, table_display], | |
| ) | |
| table_dropdown.change(on_table_select, inputs=table_dropdown, outputs=table_display) | |
| with gr.Tab("3. AI Dashboard"): | |
| gr.Markdown("Ask a simple question and the app will show the relevant chart or table.") | |
| chatbot = gr.Chatbot(type="messages", height=360) | |
| user_input = gr.Textbox( | |
| label="Ask about your data", | |
| placeholder="Example: Show me the sales trends", | |
| ) | |
| ai_chart = gr.Plot(label="Interactive Chart") | |
| ai_table = gr.Dataframe(label="Data Table", interactive=False) | |
| gr.Examples( | |
| examples=[ | |
| "Show me the sales trends", | |
| "What does the sentiment look like?", | |
| "Which titles sell the most?", | |
| "Give me a dashboard overview", | |
| ], | |
| inputs=user_input, | |
| ) | |
| user_input.submit( | |
| answer_question, | |
| inputs=[user_input, chatbot], | |
| outputs=[chatbot, user_input, ai_chart, ai_table], | |
| ) | |
| demo.launch() | |