Spaces:

Constance2222
/

123

Sleeping

App Files Files Community

123 / app.py

Constance2222

Upload 7 files

542b1cf verified 22 days ago

raw

history blame contribute delete

15.1 kB


	# AI-Assisted Code — Academic Integrity Notice
	# Generated with The App Builder. ESCP coursework.
	# Student must be able to explain all code when asked.

	import json
	import os
	import shutil
	import time
	import traceback
	from pathlib import Path
	from typing import Any, Dict, List, Tuple

	import gradio as gr
	import pandas as pd
	import papermill as pm
	import plotly.graph_objects as go

	BASE_DIR = Path(__file__).resolve().parent
	NOTEBOOK_NAME = os.environ.get("NB_ANALYSIS", "2a_Python_Analysis_Constance_Gonnelle.ipynb").strip()

	# Input files shipped with the Space
	SALES_SOURCE = BASE_DIR / "books_data.csv"
	REVIEWS_SOURCE = BASE_DIR / "synthetic_book_reviews.csv"

	# Filenames expected by the notebook
	SALES_ALIAS = BASE_DIR / "synthetic_sales_data.csv"
	REVIEWS_ALIAS = BASE_DIR / "synthetic_book_reviews.csv"

	RUNS_DIR = BASE_DIR / "runs"
	ART_DIR = BASE_DIR / "artifacts"
	PY_FIG_DIR = ART_DIR / "py" / "figures"
	PY_TAB_DIR = ART_DIR / "py" / "tables"

	PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800"))
	MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "100"))
	MAX_LOG_CHARS = int(os.environ.get("MAX_LOG_CHARS", "8000"))


	def ensure_dirs() -> None:
	"""Create all folders used by the app."""
	for folder in [RUNS_DIR, ART_DIR, PY_FIG_DIR, PY_TAB_DIR]:
	folder.mkdir(parents=True, exist_ok=True)


	def stamp() -> str:
	"""Return a timestamp for run outputs."""
	return time.strftime("%Y%m%d-%H%M%S")


	def tail(text: str, limit: int = MAX_LOG_CHARS) -> str:
	"""Keep logs short enough for the UI."""
	return (text or "")[-limit:]


	def prepare_input_files() -> List[str]:
	"""
	Make sure the notebook sees the exact filenames it expects.
	This is like giving a spreadsheet the same tab names every time.
	"""
	messages = []
	if not SALES_SOURCE.exists():
	raise FileNotFoundError("books_data.csv is missing.")
	if not REVIEWS_SOURCE.exists():
	raise FileNotFoundError("synthetic_book_reviews.csv is missing.")
	if SALES_SOURCE.resolve() != SALES_ALIAS.resolve():
	shutil.copyfile(SALES_SOURCE, SALES_ALIAS)
	messages.append("Copied books_data.csv -> synthetic_sales_data.csv")
	else:
	messages.append("synthetic_book_reviews.csv already uses the expected filename")
	messages.append("synthetic_book_reviews.csv ready")
	return messages


	def artifacts_index() -> Dict[str, Any]:
	"""List the files created by the notebook."""
	return {
	"figures": sorted(
	p.name for p in PY_FIG_DIR.glob("*") if p.suffix.lower() in {".png", ".jpg", ".jpeg"}
	),
	"tables": sorted(
	p.name for p in PY_TAB_DIR.glob("*") if p.suffix.lower() in {".csv", ".json"}
	),
	}


	def load_json(path: Path) -> Dict[str, Any]:
	"""Read JSON safely."""
	with path.open(encoding="utf-8") as file:
	return json.load(file)


	def load_table(path: Path) -> pd.DataFrame:
	"""Load a CSV or JSON artifact for preview."""
	try:
	if path.suffix.lower() == ".json":
	data = load_json(path)
	if isinstance(data, dict):
	return pd.DataFrame([data])
	return pd.DataFrame(data)
	return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS)
	except Exception as exc:
	return pd.DataFrame([{"error": str(exc)}])


	def run_notebook() -> str:
	"""Execute the student's analysis notebook with papermill."""
	ensure_dirs()
	notebook_path = BASE_DIR / NOTEBOOK_NAME
	if not notebook_path.exists():
	return f"ERROR: Notebook not found: {NOTEBOOK_NAME}"

	try:
	prep_messages = prepare_input_files()
	output_path = RUNS_DIR / f"run_{stamp()}_{NOTEBOOK_NAME}"
	pm.execute_notebook(
	input_path=str(notebook_path),
	output_path=str(output_path),
	cwd=str(BASE_DIR),
	log_output=True,
	progress_bar=False,
	request_save_on_cell_execute=True,
	execution_timeout=PAPERMILL_TIMEOUT,
	)
	idx = artifacts_index()
	figure_lines = [f" - {name}" for name in idx["figures"]] or [" - (none)"]
	table_lines = [f" - {name}" for name in idx["tables"]] or [" - (none)"]
	lines = [
	"Notebook executed successfully.",
	*prep_messages,
	"",
	f"Notebook: {NOTEBOOK_NAME}",
	f"Run file: {output_path.name}",
	"",
	"Generated figures:",
	*figure_lines,
	"",
	"Generated tables:",
	*table_lines,
	]
	return "\n".join(lines)
	except Exception as exc:
	return (
	f"FAILED: {exc}\n\n"
	f"{tail(traceback.format_exc(), 3000)}"
	)


	def load_kpis() -> Dict[str, Any]:
	"""Load the KPI summary exported by the notebook."""
	for candidate in [PY_FIG_DIR / "kpis.json", PY_TAB_DIR / "kpis.json"]:
	if candidate.exists():
	try:
	return load_json(candidate)
	except Exception:
	pass
	return {}


	def render_kpis() -> str:
	"""Build lightweight KPI cards without background images."""
	kpis = load_kpis()
	if not kpis:
	return """
	<div class="empty-state">
	<div class="empty-icon">📊</div>
	<div class="empty-title">No outputs yet</div>
	<div class="empty-text">Click “Run notebook” to generate figures, tables, and KPI cards.</div>
	</div>
	"""

	cards = []
	labels = {
	"n_titles": "Book Titles",
	"n_months": "Months",
	"total_units_sold": "Units Sold",
	"total_revenue": "Revenue",
	}
	for key, value in kpis.items():
	nice_label = labels.get(key, key.replace("_", " ").title())
	if isinstance(value, (int, float)) and abs(value) >= 1000:
	value = f"{value:,.0f}"
	cards.append(
	f"""
	<div class="kpi-card">
	<div class="kpi-label">{nice_label}</div>
	<div class="kpi-value">{value}</div>
	</div>
	"""
	)
	return f'<div class="kpi-grid">{"".join(cards)}</div>'


	def empty_chart(title: str) -> go.Figure:
	"""Return a placeholder chart."""
	fig = go.Figure()
	fig.update_layout(
	title=title,
	template="plotly_white",
	height=420,
	annotations=[
	dict(
	text="Run the notebook to populate this chart",
	x=0.5,
	y=0.5,
	xref="paper",
	yref="paper",
	showarrow=False,
	)
	],
	)
	return fig


	def build_sales_chart() -> go.Figure:
	"""Create an interactive monthly trend chart from df_dashboard.csv."""
	path = PY_TAB_DIR / "df_dashboard.csv"
	if not path.exists():
	return empty_chart("Monthly Overview")
	df = pd.read_csv(path)
	date_col = next((col for col in df.columns if "month" in col.lower() or "date" in col.lower()), None)
	metric_cols = [col for col in df.columns if col != date_col and pd.api.types.is_numeric_dtype(df[col])]
	if not date_col or not metric_cols:
	return empty_chart("Monthly Overview")
	df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
	fig = go.Figure()
	for column in metric_cols:
	fig.add_trace(
	go.Scatter(
	x=df[date_col],
	y=df[column],
	mode="lines+markers",
	name=column.replace("_", " ").title(),
	hovertemplate=f"<b>{column.replace('_', ' ').title()}</b><br>%{{x\|%b %Y}}: %{{y:,.0f}}<extra></extra>",
	)
	)
	fig.update_layout(template="plotly_white", height=450, title="Monthly Overview", hovermode="x unified")
	return fig


	def build_top_sellers_chart() -> go.Figure:
	"""Create an interactive top-sellers bar chart."""
	path = PY_TAB_DIR / "top_titles_by_units_sold.csv"
	if not path.exists():
	return empty_chart("Top Selling Titles")
	df = pd.read_csv(path).head(15)
	title_col = next((col for col in df.columns if "title" in col.lower()), df.columns[0])
	value_col = next((col for col in df.columns if "unit" in col.lower() or "sold" in col.lower()), df.columns[-1])
	fig = go.Figure(
	go.Bar(
	x=df[value_col],
	y=df[title_col],
	orientation="h",
	hovertemplate="<b>%{y}</b><br>Units: %{x:,.0f}<extra></extra>",
	)
	)
	fig.update_layout(template="plotly_white", height=max(420, len(df) * 28), title="Top Selling Titles")
	fig.update_yaxes(autorange="reversed")
	return fig


	def build_sentiment_chart() -> go.Figure:
	"""Create an interactive sentiment chart."""
	path = PY_TAB_DIR / "sentiment_counts_sampled.csv"
	if not path.exists():
	return empty_chart("Sentiment Distribution")
	df = pd.read_csv(path)
	title_col = df.columns[0]
	sentiment_cols = [col for col in ["negative", "neutral", "positive"] if col in df.columns]
	if not sentiment_cols:
	return empty_chart("Sentiment Distribution")
	fig = go.Figure()
	for col in sentiment_cols:
	fig.add_trace(
	go.Bar(
	x=df[col],
	y=df[title_col],
	name=col.title(),
	orientation="h",
	hovertemplate=f"<b>{col.title()}</b>: %{{x}}<extra></extra>",
	)
	)
	fig.update_layout(
	template="plotly_white",
	barmode="stack",
	height=max(420, len(df) * 28),
	title="Sentiment Distribution by Book",
	)
	fig.update_yaxes(autorange="reversed")
	return fig


	def refresh_gallery() -> Tuple[List[Tuple[str, str]], gr.Dropdown, pd.DataFrame]:
	"""Refresh static figures and tables."""
	figures = [(str(path), path.stem.replace("_", " ").title()) for path in sorted(PY_FIG_DIR.glob("*.png"))]
	idx = artifacts_index()
	table_choices = idx["tables"]
	first_table = load_table(PY_TAB_DIR / table_choices[0]) if table_choices else pd.DataFrame()
	return figures, gr.update(choices=table_choices, value=table_choices[0] if table_choices else None), first_table


	def on_table_select(choice: str) -> pd.DataFrame:
	"""Load the selected table."""
	if not choice:
	return pd.DataFrame([{"hint": "Select a table above."}])
	path = PY_TAB_DIR / choice
	if not path.exists():
	return pd.DataFrame([{"error": f"File not found: {choice}"}])
	return load_table(path)


	def answer_question(user_msg: str, history: List[Dict[str, str]]):
	"""Simple assistant that routes the user to the right chart or table."""
	idx = artifacts_index()
	text = (user_msg or "").strip()
	if not text:
	return history, "", None, None

	msg = text.lower()
	reply = "Ask about sales trends, sentiment, top sellers, or dashboard overview."
	chart = None
	table = None

	if not idx["figures"] and not idx["tables"]:
	reply = "No artifacts exist yet. Run the notebook first in the first tab."
	elif any(word in msg for word in ["trend", "sales", "monthly", "forecast", "arima"]):
	reply = "Here is the monthly overview built from the notebook outputs."
	chart = build_sales_chart()
	elif any(word in msg for word in ["sentiment", "review", "positive", "negative"]):
	reply = "Here is the sentiment distribution across the sampled books."
	chart = build_sentiment_chart()
	elif any(word in msg for word in ["top", "best", "popular", "rank"]):
	reply = "Here is the top-sellers table exported by the notebook."
	path = PY_TAB_DIR / "top_titles_by_units_sold.csv"
	table = load_table(path) if path.exists() else pd.DataFrame([{"error": "top_titles_by_units_sold.csv not found"}])
	elif any(word in msg for word in ["overview", "dashboard", "kpi", "summary"]):
	reply = "Here is the dashboard overview table."
	path = PY_TAB_DIR / "df_dashboard.csv"
	table = load_table(path) if path.exists() else pd.DataFrame([{"error": "df_dashboard.csv not found"}])

	new_history = (history or []) + [
	{"role": "user", "content": text},
	{"role": "assistant", "content": reply},
	]
	return new_history, "", chart, table


	def refresh_dashboard():
	"""Refresh all dashboard widgets together."""
	kpis = render_kpis()
	sales = build_sales_chart()
	sentiment = build_sentiment_chart()
	top = build_top_sellers_chart()
	figures, dropdown, table = refresh_gallery()
	return kpis, sales, sentiment, top, figures, dropdown, table


	ensure_dirs()

	with gr.Blocks(title="Book Analytics Space", css_paths=["style.css"]) as demo:
	gr.Markdown(
	"# Book Analytics Space\n"
	"This Gradio app runs your notebook on the two CSV datasets and then exposes the exported outputs."
	)

	with gr.Tab("1. Run Notebook"):
	gr.Markdown(
	"Use this tab to execute the notebook on the packaged datasets. "
	"The app automatically maps `books_data.csv` to the filename expected by the notebook."
	)
	run_btn = gr.Button("Run notebook", variant="primary")
	run_log = gr.Textbox(label="Execution log", lines=20, interactive=False)
	run_btn.click(run_notebook, outputs=run_log)

	with gr.Tab("2. Dashboard"):
	kpi_html = gr.HTML(value=render_kpis())
	refresh_btn = gr.Button("Refresh dashboard", variant="secondary")
	sales_plot = gr.Plot(label="Monthly Overview")
	sentiment_plot = gr.Plot(label="Sentiment Distribution")
	top_plot = gr.Plot(label="Top Sellers")
	gallery = gr.Gallery(label="Static Figures", columns=2, height=420, object_fit="contain")
	table_dropdown = gr.Dropdown(label="Tables", choices=[], interactive=True)
	table_display = gr.Dataframe(label="Table Preview", interactive=False)
	refresh_btn.click(
	refresh_dashboard,
	outputs=[kpi_html, sales_plot, sentiment_plot, top_plot, gallery, table_dropdown, table_display],
	)
	table_dropdown.change(on_table_select, inputs=table_dropdown, outputs=table_display)

	with gr.Tab("3. AI Dashboard"):
	gr.Markdown("Ask a simple question and the app will show the relevant chart or table.")
	chatbot = gr.Chatbot(type="messages", height=360)
	user_input = gr.Textbox(
	label="Ask about your data",
	placeholder="Example: Show me the sales trends",
	)
	ai_chart = gr.Plot(label="Interactive Chart")
	ai_table = gr.Dataframe(label="Data Table", interactive=False)
	gr.Examples(
	examples=[
	"Show me the sales trends",
	"What does the sentiment look like?",
	"Which titles sell the most?",
	"Give me a dashboard overview",
	],
	inputs=user_input,
	)
	user_input.submit(
	answer_question,
	inputs=[user_input, chatbot],
	outputs=[chatbot, user_input, ai_chart, ai_table],
	)

	demo.launch()