Yulu1 commited on
Commit
e65f8e9
·
verified ·
1 Parent(s): d8163c4

Upload 10 files

Browse files
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ background_top.png filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+ ENV PYTHONDONTWRITEBYTECODE=1
5
+ ENV PYTHONUNBUFFERED=1
6
+
7
+ ENV GRADIO_SERVER_NAME=0.0.0.0
8
+ ENV GRADIO_SERVER_PORT=7860
9
+
10
+ # System deps: R + compilers + common R pkg build deps
11
+ RUN apt-get update && apt-get install -y --no-install-recommends \
12
+ r-base \
13
+ r-base-dev \
14
+ build-essential \
15
+ curl \
16
+ git \
17
+ libcurl4-openssl-dev \
18
+ libssl-dev \
19
+ libxml2-dev \
20
+ && rm -rf /var/lib/apt/lists/*
21
+
22
+ # Install required R packages
23
+ RUN R -e "install.packages(c('forecast','ggplot2','jsonlite','readr','dplyr','tidyr','stringr','lubridate','broom'), repos='https://cloud.r-project.org')"
24
+
25
+ WORKDIR /app
26
+ COPY . /app
27
+
28
+ # Python deps (from requirements.txt)
29
+ RUN pip install --no-cache-dir -r requirements.txt
30
+
31
+ # Notebook execution deps
32
+ RUN pip install --no-cache-dir notebook ipykernel papermill
33
+
34
+ # Pre-install packages that the notebooks install via !pip install
35
+ # so papermill doesn't waste time or fail on them at runtime:
36
+ # datacreation.ipynb: beautifulsoup4 pandas matplotlib seaborn numpy textblob
37
+ # pythonanalysis.ipynb: pandas matplotlib seaborn numpy textblob faker transformers vaderSentiment
38
+ # Most are already in requirements.txt; add the extras:
39
+ RUN pip install --no-cache-dir textblob faker transformers
40
+
41
+ RUN python -m ipykernel install --user --name python3 --display-name "Python 3"
42
+
43
+ # R deps for notebook execution via papermill (IRkernel)
44
+ RUN R -e "install.packages('IRkernel', repos='https://cloud.r-project.org/')"
45
+ RUN R -e "IRkernel::installspec(user = FALSE)"
46
+
47
+ EXPOSE 7860
48
+
49
+ CMD ["python", "app.py"]
README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: RX12WorkshopApp
3
+ emoji: 📊
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,621 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import time
5
+ import traceback
6
+ from pathlib import Path
7
+ from typing import Dict, Any, List, Optional, Tuple
8
+
9
+ import pandas as pd
10
+ import gradio as gr
11
+ import papermill as pm
12
+
13
+ # Optional LLM (HuggingFace Inference API)
14
+ try:
15
+ from huggingface_hub import InferenceClient
16
+ except Exception:
17
+ InferenceClient = None
18
+
19
+ # =========================================================
20
+ # CONFIG
21
+ # =========================================================
22
+
23
+ BASE_DIR = Path(__file__).resolve().parent
24
+
25
+ NB1 = os.environ.get("NB1", "pythonanalysis.ipynb").strip()
26
+ NB2 = os.environ.get("NB2", "ranalysis.ipynb").strip()
27
+
28
+ RUNS_DIR = BASE_DIR / "runs"
29
+ ART_DIR = BASE_DIR / "artifacts"
30
+ PY_FIG_DIR = ART_DIR / "py" / "figures"
31
+ PY_TAB_DIR = ART_DIR / "py" / "tables"
32
+ R_FIG_DIR = ART_DIR / "r" / "figures"
33
+ R_TAB_DIR = ART_DIR / "r" / "tables"
34
+
35
+ PAPERMILL_TIMEOUT = int(os.environ.get("PAPERMILL_TIMEOUT", "1800"))
36
+ MAX_PREVIEW_ROWS = int(os.environ.get("MAX_FILE_PREVIEW_ROWS", "50"))
37
+ MAX_LOG_CHARS = int(os.environ.get("MAX_LOG_CHARS", "8000"))
38
+
39
+ HF_API_KEY = os.environ.get("HF_API_KEY", "").strip()
40
+ MODEL_NAME = os.environ.get("MODEL_NAME", "deepseek-ai/DeepSeek-R1").strip()
41
+ HF_PROVIDER = os.environ.get("HF_PROVIDER", "novita").strip()
42
+
43
+ LLM_ENABLED = bool(HF_API_KEY) and InferenceClient is not None
44
+ llm_client = (
45
+ InferenceClient(provider=HF_PROVIDER, api_key=HF_API_KEY)
46
+ if LLM_ENABLED
47
+ else None
48
+ )
49
+
50
+ # =========================================================
51
+ # HELPERS
52
+ # =========================================================
53
+
54
+ def ensure_dirs():
55
+ for p in [RUNS_DIR, ART_DIR, PY_FIG_DIR, PY_TAB_DIR, R_FIG_DIR, R_TAB_DIR]:
56
+ p.mkdir(parents=True, exist_ok=True)
57
+
58
+ def stamp():
59
+ return time.strftime("%Y%m%d-%H%M%S")
60
+
61
+ def tail(text: str, n: int = MAX_LOG_CHARS) -> str:
62
+ return (text or "")[-n:]
63
+
64
+ def _ls(dir_path: Path, exts: Tuple[str, ...]) -> List[str]:
65
+ if not dir_path.is_dir():
66
+ return []
67
+ return sorted(p.name for p in dir_path.iterdir() if p.is_file() and p.suffix.lower() in exts)
68
+
69
+ def _read_csv(path: Path) -> pd.DataFrame:
70
+ return pd.read_csv(path, nrows=MAX_PREVIEW_ROWS)
71
+
72
+ def _read_json(path: Path):
73
+ with path.open(encoding="utf-8") as f:
74
+ return json.load(f)
75
+
76
+ def artifacts_index() -> Dict[str, Any]:
77
+ return {
78
+ "python": {
79
+ "figures": _ls(PY_FIG_DIR, (".png", ".jpg", ".jpeg")),
80
+ "tables": _ls(PY_TAB_DIR, (".csv", ".json")),
81
+ },
82
+ "r": {
83
+ "figures": _ls(R_FIG_DIR, (".png", ".jpg", ".jpeg")),
84
+ "tables": _ls(R_TAB_DIR, (".csv", ".json")),
85
+ },
86
+ }
87
+
88
+ # =========================================================
89
+ # PIPELINE RUNNERS
90
+ # =========================================================
91
+
92
+ def run_notebook(nb_name: str) -> str:
93
+ ensure_dirs()
94
+ nb_in = BASE_DIR / nb_name
95
+ if not nb_in.exists():
96
+ return f"ERROR: {nb_name} not found."
97
+ nb_out = RUNS_DIR / f"run_{stamp()}_{nb_name}"
98
+ pm.execute_notebook(
99
+ input_path=str(nb_in),
100
+ output_path=str(nb_out),
101
+ cwd=str(BASE_DIR),
102
+ log_output=True,
103
+ progress_bar=False,
104
+ request_save_on_cell_execute=True,
105
+ execution_timeout=PAPERMILL_TIMEOUT,
106
+ )
107
+ return f"Executed {nb_name}"
108
+
109
+
110
+ def run_datacreation() -> str:
111
+ try:
112
+ log = run_notebook(NB1)
113
+ csvs = [f.name for f in BASE_DIR.glob("*.csv")]
114
+ return f"OK {log}\n\nCSVs now in /app:\n" + "\n".join(f" - {c}" for c in sorted(csvs))
115
+ except Exception as e:
116
+ return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
117
+
118
+
119
+ def run_pythonanalysis() -> str:
120
+ try:
121
+ log = run_notebook(NB2)
122
+ idx = artifacts_index()
123
+ figs = idx["python"]["figures"]
124
+ tabs = idx["python"]["tables"]
125
+ return (
126
+ f"OK {log}\n\n"
127
+ f"Figures: {', '.join(figs) or '(none)'}\n"
128
+ f"Tables: {', '.join(tabs) or '(none)'}"
129
+ )
130
+ except Exception as e:
131
+ return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
132
+
133
+
134
+ def run_r() -> str:
135
+ try:
136
+ log = run_notebook(NB3)
137
+ idx = artifacts_index()
138
+ figs = idx["r"]["figures"]
139
+ tabs = idx["r"]["tables"]
140
+ return (
141
+ f"OK {log}\n\n"
142
+ f"Figures: {', '.join(figs) or '(none)'}\n"
143
+ f"Tables: {', '.join(tabs) or '(none)'}"
144
+ )
145
+ except Exception as e:
146
+ return f"FAILED {e}\n\n{traceback.format_exc()[-2000:]}"
147
+
148
+
149
+ def run_full_pipeline() -> str:
150
+ logs = []
151
+ logs.append("=" * 50)
152
+ logs.append("STEP 1/3: Data Creation (web scraping + synthetic data)")
153
+ logs.append("=" * 50)
154
+ logs.append(run_datacreation())
155
+ logs.append("")
156
+ logs.append("=" * 50)
157
+ logs.append("STEP 2/3: Python Analysis (sentiment, ARIMA, dashboard)")
158
+ logs.append("=" * 50)
159
+ logs.append(run_pythonanalysis())
160
+ logs.append("")
161
+ logs.append("=" * 50)
162
+ logs.append("STEP 3/3: R Analysis (ETS/ARIMA forecasting)")
163
+ logs.append("=" * 50)
164
+ logs.append(run_r())
165
+ return "\n".join(logs)
166
+
167
+
168
+ # =========================================================
169
+ # GALLERY LOADERS
170
+ # =========================================================
171
+
172
+ def _load_all_figures() -> List[Tuple[str, str]]:
173
+ """Return list of (filepath, caption) for Gallery."""
174
+ items = []
175
+ for p in sorted(PY_FIG_DIR.glob("*.png")):
176
+ items.append((str(p), f"Python | {p.stem.replace('_', ' ').title()}"))
177
+ for p in sorted(R_FIG_DIR.glob("*.png")):
178
+ items.append((str(p), f"R | {p.stem.replace('_', ' ').title()}"))
179
+ return items
180
+
181
+
182
+ def _load_table_safe(path: Path) -> pd.DataFrame:
183
+ try:
184
+ if path.suffix == ".json":
185
+ obj = _read_json(path)
186
+ if isinstance(obj, dict):
187
+ return pd.DataFrame([obj])
188
+ return pd.DataFrame(obj)
189
+ return _read_csv(path)
190
+ except Exception as e:
191
+ return pd.DataFrame([{"error": str(e)}])
192
+
193
+
194
+ def refresh_gallery():
195
+ """Called when user clicks Refresh on Gallery tab."""
196
+ figures = _load_all_figures()
197
+ idx = artifacts_index()
198
+
199
+ # Build table choices
200
+ table_choices = []
201
+ for scope in ("python", "r"):
202
+ for name in idx[scope]["tables"]:
203
+ table_choices.append(f"{scope}/{name}")
204
+
205
+ # Default: show first table if available
206
+ default_df = pd.DataFrame()
207
+ if table_choices:
208
+ parts = table_choices[0].split("/", 1)
209
+ base = PY_TAB_DIR if parts[0] == "python" else R_TAB_DIR
210
+ default_df = _load_table_safe(base / parts[1])
211
+
212
+ return (
213
+ figures if figures else [],
214
+ gr.update(choices=table_choices, value=table_choices[0] if table_choices else None),
215
+ default_df,
216
+ )
217
+
218
+
219
+ def on_table_select(choice: str):
220
+ if not choice or "/" not in choice:
221
+ return pd.DataFrame([{"hint": "Select a table above."}])
222
+ scope, name = choice.split("/", 1)
223
+ base = {"python": PY_TAB_DIR, "r": R_TAB_DIR}.get(scope)
224
+ if not base:
225
+ return pd.DataFrame([{"error": f"Unknown scope: {scope}"}])
226
+ path = base / name
227
+ if not path.exists():
228
+ return pd.DataFrame([{"error": f"File not found: {path}"}])
229
+ return _load_table_safe(path)
230
+
231
+
232
+ # =========================================================
233
+ # KPI LOADER
234
+ # =========================================================
235
+
236
+ def load_kpis() -> Dict[str, Any]:
237
+ for candidate in [PY_TAB_DIR / "kpis.json", PY_FIG_DIR / "kpis.json"]:
238
+ if candidate.exists():
239
+ try:
240
+ return _read_json(candidate)
241
+ except Exception:
242
+ pass
243
+ return {}
244
+
245
+
246
+ # =========================================================
247
+ # AI DASHBOARD (Tab 3) -- LLM picks what to display
248
+ # =========================================================
249
+
250
+ DASHBOARD_SYSTEM = """You are an AI dashboard assistant for a book-sales analytics app.
251
+ The user asks questions or requests about their data. You have access to pre-computed
252
+ artifacts from Python and R analysis pipelines.
253
+
254
+ AVAILABLE ARTIFACTS (only reference ones that exist):
255
+ {artifacts_json}
256
+
257
+ KPI SUMMARY: {kpis_json}
258
+
259
+ YOUR JOB:
260
+ 1. Answer the user's question conversationally using the KPIs and your knowledge of the artifacts.
261
+ 2. At the END of your response, output a JSON block (fenced with ```json ... ```) that tells
262
+ the dashboard which artifact to display. The JSON must have this shape:
263
+ {{"show": "figure"|"table"|"none", "scope": "python"|"r", "filename": "..."}}
264
+
265
+ - Use "show": "figure" to display a chart image.
266
+ - Use "show": "table" to display a CSV/JSON table.
267
+ - Use "show": "none" if no artifact is relevant.
268
+
269
+ RULES:
270
+ - If the user asks about sales trends or forecasting by title, show sales_trends or arima figures.
271
+ - If the user asks about sentiment, show sentiment figure or sentiment_counts table.
272
+ - If the user asks about R regression, the R notebook focuses on forecasting, show accuracy_table.csv.
273
+ - If the user asks about forecast accuracy or model comparison, show accuracy_table.csv or forecast_compare.png.
274
+ - If the user asks about top sellers, show top_titles_by_units_sold.csv.
275
+ - If the user asks a general data question, pick the most relevant artifact.
276
+ - Keep your answer concise (2-4 sentences), then the JSON block.
277
+ """
278
+
279
+ JSON_BLOCK_RE = re.compile(r"```json\s*(\{.*?\})\s*```", re.DOTALL)
280
+ FALLBACK_JSON_RE = re.compile(r"\{[^{}]*\"show\"[^{}]*\}", re.DOTALL)
281
+
282
+
283
+ def _parse_display_directive(text: str) -> Dict[str, str]:
284
+ m = JSON_BLOCK_RE.search(text)
285
+ if m:
286
+ try:
287
+ return json.loads(m.group(1))
288
+ except json.JSONDecodeError:
289
+ pass
290
+ m = FALLBACK_JSON_RE.search(text)
291
+ if m:
292
+ try:
293
+ return json.loads(m.group(0))
294
+ except json.JSONDecodeError:
295
+ pass
296
+ return {"show": "none"}
297
+
298
+
299
+ def _clean_response(text: str) -> str:
300
+ """Strip the JSON directive block from the displayed response."""
301
+ return JSON_BLOCK_RE.sub("", text).strip()
302
+
303
+
304
+ def ai_chat(user_msg: str, history: list):
305
+ """Chat function for the AI Dashboard tab."""
306
+ if not user_msg or not user_msg.strip():
307
+ return history, "", None, None
308
+
309
+ idx = artifacts_index()
310
+ kpis = load_kpis()
311
+
312
+ if not LLM_ENABLED:
313
+ reply, directive = _keyword_fallback(user_msg, idx, kpis)
314
+ else:
315
+ system = DASHBOARD_SYSTEM.format(
316
+ artifacts_json=json.dumps(idx, indent=2),
317
+ kpis_json=json.dumps(kpis, indent=2) if kpis else "(no KPIs yet, run the pipeline first)",
318
+ )
319
+ msgs = [{"role": "system", "content": system}]
320
+ for entry in (history or [])[-6:]:
321
+ msgs.append(entry)
322
+ msgs.append({"role": "user", "content": user_msg})
323
+
324
+ try:
325
+ r = llm_client.chat_completion(
326
+ model=MODEL_NAME,
327
+ messages=msgs,
328
+ temperature=0.3,
329
+ max_tokens=600,
330
+ stream=False,
331
+ )
332
+ raw = (
333
+ r["choices"][0]["message"]["content"]
334
+ if isinstance(r, dict)
335
+ else r.choices[0].message.content
336
+ )
337
+ directive = _parse_display_directive(raw)
338
+ reply = _clean_response(raw)
339
+ except Exception as e:
340
+ reply = f"LLM error: {e}. Falling back to keyword matching."
341
+ reply_fb, directive = _keyword_fallback(user_msg, idx, kpis)
342
+ reply += "\n\n" + reply_fb
343
+
344
+ # Resolve artifact paths
345
+ fig_out = None
346
+ tab_out = None
347
+ show = directive.get("show", "none")
348
+ scope = directive.get("scope", "")
349
+ fname = directive.get("filename", "")
350
+
351
+ if show == "figure" and scope and fname:
352
+ base = {"python": PY_FIG_DIR, "r": R_FIG_DIR}.get(scope)
353
+ if base and (base / fname).exists():
354
+ fig_out = str(base / fname)
355
+ else:
356
+ reply += f"\n\n*(Could not find figure: {scope}/{fname})*"
357
+
358
+ if show == "table" and scope and fname:
359
+ base = {"python": PY_TAB_DIR, "r": R_TAB_DIR}.get(scope)
360
+ if base and (base / fname).exists():
361
+ tab_out = _load_table_safe(base / fname)
362
+ else:
363
+ reply += f"\n\n*(Could not find table: {scope}/{fname})*"
364
+
365
+ new_history = (history or []) + [
366
+ {"role": "user", "content": user_msg},
367
+ {"role": "assistant", "content": reply},
368
+ ]
369
+
370
+ return new_history, "", fig_out, tab_out
371
+
372
+
373
+ def _keyword_fallback(msg: str, idx: Dict, kpis: Dict) -> Tuple[str, Dict]:
374
+ """Simple keyword matcher when LLM is unavailable."""
375
+ msg_lower = msg.lower()
376
+
377
+ if not any(idx[s]["figures"] or idx[s]["tables"] for s in ("python", "r")):
378
+ return (
379
+ "No artifacts found yet. Please run the pipeline first (Tab 1), "
380
+ "then come back here to explore the results.",
381
+ {"show": "none"},
382
+ )
383
+
384
+ kpi_text = ""
385
+ if kpis:
386
+ total = kpis.get("total_units_sold", 0)
387
+ kpi_text = (
388
+ f"Quick summary: **{kpis.get('n_titles', '?')}** book titles across "
389
+ f"**{kpis.get('n_months', '?')}** months, with **{total:,.0f}** total units sold."
390
+ )
391
+
392
+ if any(w in msg_lower for w in ["trend", "sales trend", "monthly sale"]):
393
+ return (
394
+ f"Here are the sales trends for sampled titles. {kpi_text}",
395
+ {"show": "figure", "scope": "python", "filename": "sales_trends_sampled_titles.png"},
396
+ )
397
+
398
+ if any(w in msg_lower for w in ["sentiment", "review", "positive", "negative"]):
399
+ return (
400
+ f"Here is the sentiment distribution across sampled book titles. {kpi_text}",
401
+ {"show": "figure", "scope": "python", "filename": "sentiment_distribution_sampled_titles.png"},
402
+ )
403
+
404
+ if any(w in msg_lower for w in ["arima", "forecast", "predict"]):
405
+ if "compar" in msg_lower or "ets" in msg_lower or "accuracy" in msg_lower:
406
+ if "forecast_compare.png" in idx.get("r", {}).get("figures", []):
407
+ return (
408
+ "Here is the ARIMA+Fourier vs ETS forecast comparison from the R analysis.",
409
+ {"show": "figure", "scope": "r", "filename": "forecast_compare.png"},
410
+ )
411
+ return (
412
+ f"Here are the ARIMA forecasts for sampled titles from the Python analysis. {kpi_text}",
413
+ {"show": "figure", "scope": "python", "filename": "arima_forecasts_sampled_titles.png"},
414
+ )
415
+
416
+ if any(w in msg_lower for w in ["regression", "lm", "coefficient", "price effect", "rating effect"]):
417
+ return (
418
+ "The R notebook focuses on forecasting rather than regression. "
419
+ "Here is the forecast accuracy comparison instead.",
420
+ {"show": "table", "scope": "r", "filename": "accuracy_table.csv"},
421
+ )
422
+
423
+ if any(w in msg_lower for w in ["top", "best sell", "popular", "rank"]):
424
+ return (
425
+ f"Here are the top-selling titles by units sold. {kpi_text}",
426
+ {"show": "table", "scope": "python", "filename": "top_titles_by_units_sold.csv"},
427
+ )
428
+
429
+ if any(w in msg_lower for w in ["accuracy", "benchmark", "rmse", "mape"]):
430
+ return (
431
+ "Here is the forecast accuracy comparison (ARIMA+Fourier vs ETS) from the R analysis.",
432
+ {"show": "table", "scope": "r", "filename": "accuracy_table.csv"},
433
+ )
434
+
435
+ if any(w in msg_lower for w in ["r analysis", "r output", "r result"]):
436
+ if "forecast_compare.png" in idx.get("r", {}).get("figures", []):
437
+ return (
438
+ "Here is the main R output: forecast model comparison plot.",
439
+ {"show": "figure", "scope": "r", "filename": "forecast_compare.png"},
440
+ )
441
+
442
+ if any(w in msg_lower for w in ["dashboard", "overview", "summary", "kpi"]):
443
+ return (
444
+ f"Dashboard overview: {kpi_text}\n\nAsk me about sales trends, sentiment, forecasts, "
445
+ "forecast accuracy, or top sellers to see specific visualizations.",
446
+ {"show": "table", "scope": "python", "filename": "df_dashboard.csv"},
447
+ )
448
+
449
+ # Default
450
+ return (
451
+ f"I can show you various analyses. {kpi_text}\n\n"
452
+ "Try asking about: **sales trends**, **sentiment**, **ARIMA forecasts**, "
453
+ "**forecast accuracy**, **top sellers**, or **dashboard overview**.",
454
+ {"show": "none"},
455
+ )
456
+
457
+
458
+ # =========================================================
459
+ # UI
460
+ # =========================================================
461
+
462
+ ensure_dirs()
463
+
464
+ def load_css() -> str:
465
+ css_path = BASE_DIR / "style.css"
466
+ return css_path.read_text(encoding="utf-8") if css_path.exists() else ""
467
+
468
+
469
+ with gr.Blocks(title="RX12 Workshop App") as demo:
470
+
471
+ gr.Markdown(
472
+ "# RX12 - Intro to Python and R - Workshop App\n"
473
+ "*The app to integrate the three notebooks in to get a functioning blueprint of the group project's final product*",
474
+ elem_id="escp_title",
475
+ )
476
+
477
+ # ===========================================================
478
+ # TAB 1 -- Pipeline Runner
479
+ # ===========================================================
480
+ with gr.Tab("Pipeline Runner"):
481
+ gr.Markdown(
482
+ )
483
+
484
+ with gr.Row():
485
+ with gr.Column(scale=1):
486
+ btn_nb1 = gr.Button(
487
+ "Step 1: Data Creation",
488
+ variant="secondary",
489
+ )
490
+ gr.Markdown(
491
+ )
492
+ with gr.Column(scale=1):
493
+ btn_nb2 = gr.Button(
494
+ "Step 2a: Python Analysis",
495
+ variant="secondary",
496
+ )
497
+ gr.Markdown(
498
+ )
499
+ with gr.Column(scale=1):
500
+ btn_r = gr.Button(
501
+ "Step 2b: R Analysis",
502
+ variant="secondary",
503
+ )
504
+ gr.Markdown(
505
+ )
506
+
507
+ with gr.Row():
508
+ btn_all = gr.Button(
509
+ "Run All 3 Steps",
510
+ variant="primary",
511
+ )
512
+
513
+ run_log = gr.Textbox(
514
+ label="Execution Log",
515
+ lines=18,
516
+ max_lines=30,
517
+ interactive=False,
518
+ )
519
+
520
+ btn_nb1.click(run_datacreation, outputs=[run_log])
521
+ btn_nb2.click(run_pythonanalysis, outputs=[run_log])
522
+ btn_r.click(run_r, outputs=[run_log])
523
+ btn_all.click(run_full_pipeline, outputs=[run_log])
524
+
525
+ # ===========================================================
526
+ # TAB 2 -- Results Gallery
527
+ # ===========================================================
528
+ with gr.Tab("Results Gallery"):
529
+ gr.Markdown(
530
+ "### All generated artifacts\n\n"
531
+ "After running the pipeline, click **Refresh** to load all figures and tables. "
532
+ "Figures are shown in the gallery; select a table from the dropdown to inspect it."
533
+ )
534
+
535
+ refresh_btn = gr.Button("Refresh Gallery", variant="primary")
536
+
537
+ gr.Markdown("#### Figures")
538
+ gallery = gr.Gallery(
539
+ label="All Figures (Python + R)",
540
+ columns=2,
541
+ height=480,
542
+ object_fit="contain",
543
+ )
544
+
545
+ gr.Markdown("#### Tables")
546
+ table_dropdown = gr.Dropdown(
547
+ label="Select a table to view",
548
+ choices=[],
549
+ interactive=True,
550
+ )
551
+ table_display = gr.Dataframe(
552
+ label="Table Preview",
553
+ interactive=False,
554
+ )
555
+
556
+ refresh_btn.click(
557
+ refresh_gallery,
558
+ outputs=[gallery, table_dropdown, table_display],
559
+ )
560
+ table_dropdown.change(
561
+ on_table_select,
562
+ inputs=[table_dropdown],
563
+ outputs=[table_display],
564
+ )
565
+
566
+ # ===========================================================
567
+ # TAB 3 -- AI Dashboard
568
+ # ===========================================================
569
+ with gr.Tab('"AI" Dashboard'):
570
+ gr.Markdown(
571
+ "### Ask questions, get visualisations\n\n"
572
+ "Describe what you want to see and the AI will pick the right chart or table. "
573
+ + (
574
+ "*LLM is active.*"
575
+ if LLM_ENABLED
576
+ else "*No API key detected \u2014 using keyword matching. "
577
+ "Set `HF_API_KEY` in Space secrets for full LLM support.*"
578
+ )
579
+ )
580
+
581
+ with gr.Row(equal_height=True):
582
+ with gr.Column(scale=1):
583
+ chatbot = gr.Chatbot(
584
+ label="Conversation",
585
+ height=380,
586
+ )
587
+ user_input = gr.Textbox(
588
+ label="Ask about your data",
589
+ placeholder="e.g. Show me sales trends / What drives revenue? / Compare forecast models",
590
+ lines=1,
591
+ )
592
+ gr.Examples(
593
+ examples=[
594
+ "Show me the sales trends",
595
+ "What does the sentiment look like?",
596
+ "Which titles sell the most?",
597
+ "Show the forecast accuracy comparison",
598
+ "Compare the ARIMA and ETS forecasts",
599
+ "Give me a dashboard overview",
600
+ ],
601
+ inputs=user_input,
602
+ )
603
+
604
+ with gr.Column(scale=1):
605
+ ai_figure = gr.Image(
606
+ label="Visualisation",
607
+ height=350,
608
+ )
609
+ ai_table = gr.Dataframe(
610
+ label="Data Table",
611
+ interactive=False,
612
+ )
613
+
614
+ user_input.submit(
615
+ ai_chat,
616
+ inputs=[user_input, chatbot],
617
+ outputs=[chatbot, user_input, ai_figure, ai_table],
618
+ )
619
+
620
+
621
+ demo.launch(css=load_css(), allowed_paths=[str(BASE_DIR)])
background_bottom.png ADDED
background_mid.png ADDED
background_top.png ADDED

Git LFS Details

  • SHA256: 27e963d20dbb7ae88368fb527d475c85ef0de3df63d8f0d7d5e2af7403a5b365
  • Pointer size: 131 Bytes
  • Size of remote file: 726 kB
gitattributes ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ESCP_BANNER[[:space:]](2).png filter=lfs diff=lfs merge=lfs -text
37
+ background.png filter=lfs diff=lfs merge=lfs -text
38
+ background_top.png filter=lfs diff=lfs merge=lfs -text
pythonanalysis.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==6.0.0
2
+ pandas>=2.0.0
3
+ numpy>=1.24.0
4
+ matplotlib>=3.7.0
5
+ seaborn>=0.13.0
6
+ statsmodels>=0.14.0
7
+ scikit-learn>=1.3.0
8
+ papermill>=2.5.0
9
+ nbformat>=5.9.0
10
+ pillow>=10.0.0
11
+ requests>=2.31.0
12
+ beautifulsoup4>=4.12.0
13
+ vaderSentiment>=3.3.2
14
+ huggingface_hub>=0.20.0
15
+ textblob>=0.18.0
16
+ faker>=20.0.0
style.css ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* --- Target the Gradio app wrapper for backgrounds --- */
2
+ gradio-app,
3
+ .gradio-app,
4
+ .main,
5
+ #app,
6
+ [data-testid="app"] {
7
+ background-color: rgb(40,9,109) !important;
8
+ background-image:
9
+ url('https://huggingface.co/spaces/escp/rx12workshoptemplate/resolve/main/background_top.png'),
10
+ url('https://huggingface.co/spaces/escp/rx12workshoptemplate/resolve/main/background_mid.png') !important;
11
+ background-position:
12
+ top center,
13
+ 0 913px !important;
14
+ background-repeat:
15
+ no-repeat,
16
+ repeat-y !important;
17
+ background-size:
18
+ 100% auto,
19
+ 100% auto !important;
20
+ min-height: 100vh !important;
21
+ }
22
+
23
+ /* --- Fallback on html/body --- */
24
+ html, body {
25
+ background-color: rgb(40,9,109) !important;
26
+ margin: 0 !important;
27
+ padding: 0 !important;
28
+ min-height: 100vh !important;
29
+ }
30
+
31
+ /* --- Fixed bottom banner using ::after on body --- */
32
+ body::after {
33
+ content: '' !important;
34
+ position: fixed !important;
35
+ bottom: 0 !important;
36
+ left: 0 !important;
37
+ right: 0 !important;
38
+ height: 130px !important;
39
+ background-image: url('https://huggingface.co/spaces/escp/rx12workshoptemplate/resolve/main/background_bottom.png') !important;
40
+ background-size: 100% 100% !important;
41
+ background-repeat: no-repeat !important;
42
+ background-position: bottom center !important;
43
+ pointer-events: none !important;
44
+ z-index: 9999 !important;
45
+ }
46
+
47
+ /* --- Main container --- */
48
+ .gradio-container {
49
+ max-width: 1400px !important;
50
+ width: 94vw !important;
51
+ margin: 0 auto !important;
52
+ padding-top: 220px !important;
53
+ padding-bottom: 150px !important;
54
+ background: transparent !important;
55
+ }
56
+
57
+ /* --- Title in ESCP gold --- */
58
+ #escp_title h1 {
59
+ color: rgb(242,198,55) !important;
60
+ font-size: 3rem !important;
61
+ font-weight: 800 !important;
62
+ text-align: center !important;
63
+ margin: 0 0 12px 0 !important;
64
+ }
65
+
66
+ /* --- Subtitle --- */
67
+ #escp_title p, #escp_title em {
68
+ color: rgba(255,255,255,0.85) !important;
69
+ text-align: center !important;
70
+ }
71
+
72
+ /* --- Tab bar background --- */
73
+ .tabs > .tab-nav,
74
+ .tab-nav,
75
+ div[role="tablist"],
76
+ .svelte-tabs > .tab-nav {
77
+ background: rgba(40,9,109,0.6) !important;
78
+ border-radius: 10px 10px 0 0 !important;
79
+ padding: 4px !important;
80
+ }
81
+
82
+ /* --- ALL tab buttons: force white text --- */
83
+ .tabs > .tab-nav button,
84
+ .tab-nav button,
85
+ div[role="tablist"] button,
86
+ button[role="tab"],
87
+ .svelte-tabs button,
88
+ .tab-nav > button,
89
+ .tabs button {
90
+ color: #ffffff !important;
91
+ font-weight: 600 !important;
92
+ border: none !important;
93
+ background: transparent !important;
94
+ padding: 10px 20px !important;
95
+ border-radius: 8px 8px 0 0 !important;
96
+ opacity: 1 !important;
97
+ }
98
+
99
+ /* --- Selected tab: ESCP gold --- */
100
+ .tabs > .tab-nav button.selected,
101
+ .tab-nav button.selected,
102
+ button[role="tab"][aria-selected="true"],
103
+ button[role="tab"].selected,
104
+ div[role="tablist"] button[aria-selected="true"],
105
+ .svelte-tabs button.selected {
106
+ color: rgb(242,198,55) !important;
107
+ background: rgba(255,255,255,0.12) !important;
108
+ }
109
+
110
+ /* --- Unselected tabs: ensure visibility --- */
111
+ .tabs > .tab-nav button:not(.selected),
112
+ .tab-nav button:not(.selected),
113
+ button[role="tab"][aria-selected="false"],
114
+ button[role="tab"]:not(.selected),
115
+ div[role="tablist"] button:not([aria-selected="true"]) {
116
+ color: #ffffff !important;
117
+ opacity: 1 !important;
118
+ }
119
+
120
+ /* --- White card panels --- */
121
+ .gradio-container .gr-block,
122
+ .gradio-container .gr-box,
123
+ .gradio-container .gr-panel,
124
+ .gradio-container .gr-group {
125
+ background: #ffffff !important;
126
+ border-radius: 10px !important;
127
+ }
128
+
129
+ /* --- Tab content area --- */
130
+ .tabitem {
131
+ background: rgba(255,255,255,0.95) !important;
132
+ border-radius: 0 0 10px 10px !important;
133
+ padding: 16px !important;
134
+ }
135
+
136
+ /* --- Inputs --- */
137
+ .gradio-container input,
138
+ .gradio-container textarea,
139
+ .gradio-container select {
140
+ background: #ffffff !important;
141
+ border: 1px solid #d1d5db !important;
142
+ border-radius: 8px !important;
143
+ }
144
+
145
+ /* --- Buttons: ESCP purple primary --- */
146
+ .gradio-container button:not([role="tab"]) {
147
+ font-weight: 600 !important;
148
+ padding: 10px 16px !important;
149
+ border-radius: 10px !important;
150
+ }
151
+
152
+ button.primary {
153
+ background-color: rgb(40,9,109) !important;
154
+ color: #ffffff !important;
155
+ border: none !important;
156
+ }
157
+
158
+ button.primary:hover {
159
+ background-color: rgb(60,20,140) !important;
160
+ }
161
+
162
+ button.secondary {
163
+ background-color: #ffffff !important;
164
+ color: rgb(40,9,109) !important;
165
+ border: 2px solid rgb(40,9,109) !important;
166
+ }
167
+
168
+ button.secondary:hover {
169
+ background-color: rgb(240,238,250) !important;
170
+ }
171
+
172
+ /* --- Dataframes --- */
173
+ [data-testid="dataframe"] {
174
+ background-color: #ffffff !important;
175
+ border-radius: 10px !important;
176
+ }
177
+
178
+ table {
179
+ font-size: 0.85rem !important;
180
+ }
181
+
182
+ /* --- Chatbot (AI Dashboard tab) --- */
183
+ .gr-chatbot {
184
+ min-height: 380px !important;
185
+ background-color: #ffffff !important;
186
+ border-radius: 12px !important;
187
+ }
188
+
189
+ .gr-chatbot .message.user {
190
+ background-color: rgb(232,225,250) !important;
191
+ border-radius: 12px !important;
192
+ }
193
+
194
+ .gr-chatbot .message.bot {
195
+ background-color: #f3f4f6 !important;
196
+ border-radius: 12px !important;
197
+ }
198
+
199
+ /* --- Gallery --- */
200
+ .gallery {
201
+ background: #ffffff !important;
202
+ border-radius: 10px !important;
203
+ }
204
+
205
+ /* --- Log textbox --- */
206
+ textarea {
207
+ font-family: monospace !important;
208
+ font-size: 0.8rem !important;
209
+ }
210
+
211
+ /* --- Markdown headings inside tabs --- */
212
+ .tabitem h3 {
213
+ color: rgb(40,9,109) !important;
214
+ font-weight: 700 !important;
215
+ }
216
+
217
+ .tabitem h4 {
218
+ color: #374151 !important;
219
+ }
220
+
221
+ /* --- Examples row (AI Dashboard) --- */
222
+ .examples-row button {
223
+ background: rgb(240,238,250) !important;
224
+ color: rgb(40,9,109) !important;
225
+ border: 1px solid rgb(40,9,109) !important;
226
+ border-radius: 8px !important;
227
+ font-size: 0.85rem !important;
228
+ }
229
+
230
+ .examples-row button:hover {
231
+ background: rgb(232,225,250) !important;
232
+ }
233
+
234
+ /* --- Header / footer: transparent over banner --- */
235
+ header, header *,
236
+ footer, footer * {
237
+ background: transparent !important;
238
+ box-shadow: none !important;
239
+ }
240
+
241
+ footer a, footer button,
242
+ header a, header button {
243
+ background: transparent !important;
244
+ border: none !important;
245
+ box-shadow: none !important;
246
+ }
247
+
248
+ #footer, #footer *,
249
+ [class*="footer"], [class*="footer"] *,
250
+ [class*="chip"], [class*="pill"], [class*="chip"] *, [class*="pill"] * {
251
+ background: transparent !important;
252
+ border: none !important;
253
+ box-shadow: none !important;
254
+ }
255
+
256
+ [data-testid*="api"], [data-testid*="settings"],
257
+ [id*="api"], [id*="settings"],
258
+ [class*="api"], [class*="settings"],
259
+ [class*="bottom"], [class*="toolbar"], [class*="controls"] {
260
+ background: transparent !important;
261
+ box-shadow: none !important;
262
+ }
263
+
264
+ [data-testid*="api"] *, [data-testid*="settings"] *,
265
+ [id*="api"] *, [id*="settings"] *,
266
+ [class*="api"] *, [class*="settings"] * {
267
+ background: transparent !important;
268
+ box-shadow: none !important;
269
+ }
270
+
271
+ section footer {
272
+ background: transparent !important;
273
+ }
274
+
275
+ section footer button,
276
+ section footer a {
277
+ background: transparent !important;
278
+ background-color: transparent !important;
279
+ border: none !important;
280
+ box-shadow: none !important;
281
+ color: white !important;
282
+ }
283
+
284
+ section footer button:hover,
285
+ section footer button:focus,
286
+ section footer a:hover,
287
+ section footer a:focus {
288
+ background: transparent !important;
289
+ background-color: transparent !important;
290
+ box-shadow: none !important;
291
+ }
292
+
293
+ section footer button,
294
+ section footer button * {
295
+ background: transparent !important;
296
+ background-color: transparent !important;
297
+ background-image: none !important;
298
+ box-shadow: none !important;
299
+ filter: none !important;
300
+ }
301
+
302
+ section footer button::before,
303
+ section footer button::after {
304
+ background: transparent !important;
305
+ background-color: transparent !important;
306
+ background-image: none !important;
307
+ box-shadow: none !important;
308
+ filter: none !important;
309
+ }
310
+
311
+ section footer a,
312
+ section footer a * {
313
+ background: transparent !important;
314
+ background-color: transparent !important;
315
+ box-shadow: none !important;
316
+ }
317
+
318
+ .gradio-container footer button,
319
+ .gradio-container footer button *,
320
+ .gradio-container .footer button,
321
+ .gradio-container .footer button * {
322
+ background: transparent !important;
323
+ background-color: transparent !important;
324
+ background-image: none !important;
325
+ box-shadow: none !important;
326
+ }
327
+
328
+ .gradio-container footer button::before,
329
+ .gradio-container footer button::after,
330
+ .gradio-container .footer button::before,
331
+ .gradio-container .footer button::after {
332
+ background: transparent !important;
333
+ background-color: transparent !important;
334
+ background-image: none !important;
335
+ box-shadow: none !important;
336
+ }