Spaces:

ESCP
/

AS2_7_SE21_Return-Risk

Running

App Files Files Community

ceeyyuhhh commited on 4 days ago

Commit

5bb4638

verified ·

1 Parent(s): 3b4a320

Delete pythonanalysis.ipynb

Browse files

Files changed (1) hide show

pythonanalysis.ipynb +0 -1046

pythonanalysis.ipynb DELETED Viewed

@@ -1,1046 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "id": "85361b58",
-      "metadata": {
-        "id": "85361b58"
-      },
-      "source": [
-        "# Step 2 — Python Analysis / Modeling\n",
-        "\n",
-        "Clean version for the Hugging Face SE21 app template. It creates dashboard artifacts."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 5,
-      "id": "c88b847c",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "c88b847c",
-        "outputId": "d0c3643a-d491-4746-a55b-35ed016e4fe4"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Environment ready.\n",
-            "BASE_PATH: /content\n",
-            "CSV files found:\n",
-            "- /content/Womens Clothing E-Commerce Reviews.csv\n",
-            "- /content/ecommerce_returns_cleaned.csv\n",
-            "Using reviews file: /content/Womens Clothing E-Commerce Reviews.csv\n",
-            "Using returns file: /content/ecommerce_returns_cleaned.csv\n",
-            "Reviews shape: (23486, 10)\n",
-            "Returns shape: (113314, 29)\n",
-            "Reviews columns: ['Clothing ID', 'Age', 'Title', 'Review Text', 'Rating', 'Recommended IND', 'Positive Feedback Count', 'Division Name', 'Department Name', 'Class Name']\n",
-            "Returns columns: ['order_id', 'order_item_id', 'product_id', 'seller_id', 'customer_id', 'order_status', 'order_purchase_timestamp', 'order_delivered_customer_date', 'order_estimated_delivery_date', 'review_score', 'review_comment_title', 'review_comment_message', 'price', 'freight_value', 'total_cost', 'product_category_name', 'product_name_lenght', 'product_description_lenght', 'product_photos_qty', 'product_weight_g', 'product_length_cm', 'product_height_cm', 'product_width_cm', 'has_review_text', 'review_text_length', 'delivery_delay_days', 'negative_keyword_flag', 'synthetic_return_risk', 'likely_return']\n",
-            "Data loaded and cleaned.\n"
-          ]
-        }
-      ],
-      "source": [
-        "# ==================================================\n",
-        "# STEP 2: UNIVERSAL ANALYSIS SETUP\n",
-        "# Works in BOTH Hugging Face Spaces and Google Colab\n",
-        "# ==================================================\n",
-        "\n",
-        "import os\n",
-        "import json\n",
-        "import random\n",
-        "import warnings\n",
-        "from pathlib import Path\n",
-        "\n",
-        "os.environ.setdefault(\"MPLCONFIGDIR\", \"/tmp/matplotlib\")\n",
-        "\n",
-        "import numpy as np\n",
-        "import pandas as pd\n",
-        "import matplotlib.pyplot as plt\n",
-        "\n",
-        "warnings.filterwarnings(\"ignore\")\n",
-        "random.seed(42)\n",
-        "np.random.seed(42)\n",
-        "\n",
-        "# Pick the correct runtime folder automatically.\n",
-        "# Hugging Face Space uses /app. Colab uses /content.\n",
-        "candidate_roots = [Path(\"/app\"), Path(\"/content\"), Path.cwd(), Path(\"/mnt/data\")]\n",
-        "BASE_PATH = None\n",
-        "\n",
-        "for root in candidate_roots:\n",
-        "    if root.exists():\n",
-        "        csvs = []\n",
-        "        for p in root.rglob(\"*.csv\"):\n",
-        "            parts = {part.lower() for part in p.parts}\n",
-        "            if \"sample_data\" in parts:\n",
-        "                continue\n",
-        "            if \"outputs\" in parts or \"figures\" in parts or \"tables\" in parts or \"artifacts\" in parts:\n",
-        "                continue\n",
-        "            csvs.append(p)\n",
-        "        if csvs:\n",
-        "            BASE_PATH = root\n",
-        "            break\n",
-        "\n",
-        "if BASE_PATH is None:\n",
-        "    if Path(\"/app\").exists():\n",
-        "        BASE_PATH = Path(\"/app\")\n",
-        "    elif Path(\"/content\").exists():\n",
-        "        BASE_PATH = Path(\"/content\")\n",
-        "    else:\n",
-        "        BASE_PATH = Path.cwd()\n",
-        "\n",
-        "DATA_PROCESSED = BASE_PATH / \"data_processed\"\n",
-        "\n",
-        "OUTPUTS = BASE_PATH / \"outputs\"\n",
-        "FIGURES = BASE_PATH / \"figures\"\n",
-        "TABLES = BASE_PATH / \"tables\"\n",
-        "ARTIFACTS = BASE_PATH / \"artifacts\"\n",
-        "\n",
-        "# Extra folders because different templates check different places\n",
-        "OUTPUT_FIGURES = OUTPUTS / \"figures\"\n",
-        "OUTPUT_TABLES = OUTPUTS / \"tables\"\n",
-        "ARTIFACT_FIGURES = ARTIFACTS / \"figures\"\n",
-        "ARTIFACT_TABLES = ARTIFACTS / \"tables\"\n",
-        "\n",
-        "ALL_OUTPUT_DIRS = [\n",
-        "    DATA_PROCESSED,\n",
-        "    OUTPUTS,\n",
-        "    FIGURES,\n",
-        "    TABLES,\n",
-        "    ARTIFACTS,\n",
-        "    OUTPUT_FIGURES,\n",
-        "    OUTPUT_TABLES,\n",
-        "    ARTIFACT_FIGURES,\n",
-        "    ARTIFACT_TABLES,\n",
-        "]\n",
-        "\n",
-        "for folder in ALL_OUTPUT_DIRS:\n",
-        "    folder.mkdir(parents=True, exist_ok=True)\n",
-        "\n",
-        "print(\"Environment ready.\")\n",
-        "print(\"BASE_PATH:\", BASE_PATH)\n",
-        "\n",
-        "# Load data created by Step 1 if available.\n",
-        "csv_paths = []\n",
-        "for p in BASE_PATH.rglob(\"*.csv\"):\n",
-        "    parts = {part.lower() for part in p.parts}\n",
-        "    if \"sample_data\" in parts:\n",
-        "        continue\n",
-        "    if \"outputs\" in parts or \"figures\" in parts or \"tables\" in parts or \"artifacts\" in parts:\n",
-        "        continue\n",
-        "    csv_paths.append(p)\n",
-        "\n",
-        "print(\"CSV files found:\")\n",
-        "for p in csv_paths:\n",
-        "    print(\"-\", p)\n",
-        "\n",
-        "def first_existing(paths):\n",
-        "    for p in paths:\n",
-        "        if Path(p).exists():\n",
-        "            return Path(p)\n",
-        "    return None\n",
-        "\n",
-        "reviews_path = first_existing([\n",
-        "    DATA_PROCESSED / \"reviews_cleaned.csv\",\n",
-        "    DATA_PROCESSED / \"womens_reviews_cleaned.csv\",\n",
-        "    BASE_PATH / \"Womens Clothing E-Commerce Reviews.csv\",\n",
-        "])\n",
-        "\n",
-        "returns_path = first_existing([\n",
-        "    DATA_PROCESSED / \"returns_input.csv\",\n",
-        "    DATA_PROCESSED / \"returns_cleaned.csv\",\n",
-        "    BASE_PATH / \"ecommerce_returns_cleaned.csv\",\n",
-        "    DATA_PROCESSED / \"synthetic_return_risk.csv\",\n",
-        "])\n",
-        "\n",
-        "# Fallback search.\n",
-        "if reviews_path is None:\n",
-        "    review_matches = [\n",
-        "        p for p in csv_paths\n",
-        "        if (\"clothing\" in p.name.lower()) or (\"review\" in p.name.lower() and \"return\" not in p.name.lower())\n",
-        "    ]\n",
-        "    reviews_path = review_matches[0] if review_matches else None\n",
-        "\n",
-        "if returns_path is None:\n",
-        "    return_matches = [\n",
-        "        p for p in csv_paths\n",
-        "        if \"return\" in p.name.lower()\n",
-        "    ]\n",
-        "    returns_path = return_matches[0] if return_matches else None\n",
-        "\n",
-        "\n",
-        "if returns_path is None:\n",
-        "    raise FileNotFoundError(\"Step 2 could not find the ecommerce returns CSV.\")\n",
-        "\n",
-        "print(\"Using reviews file:\", reviews_path)\n",
-        "print(\"Using returns file:\", returns_path)\n",
-        "\n",
-        "reviews_df = pd.read_csv(reviews_path).drop(columns=[\"Unnamed: 0\"], errors=\"ignore\")\n",
-        "returns_df = pd.read_csv(returns_path).drop(columns=[\"Unnamed: 0\"], errors=\"ignore\")\n",
-        "\n",
-        "print(\"Reviews shape:\", reviews_df.shape)\n",
-        "print(\"Returns shape:\", returns_df.shape)\n",
-        "print(\"Reviews columns:\", reviews_df.columns.tolist())\n",
-        "print(\"Returns columns:\", returns_df.columns.tolist())\n",
-        "\n",
-        "# Basic cleanup / type safety\n",
-        "for col in [\"Age\", \"Rating\", \"Recommended IND\", \"Positive Feedback Count\"]:\n",
-        "    if col in reviews_df.columns:\n",
-        "        reviews_df[col] = pd.to_numeric(reviews_df[col], errors=\"coerce\")\n",
-        "\n",
-        "if \"Review Text\" in reviews_df.columns:\n",
-        "    reviews_df[\"Review Text\"] = reviews_df[\"Review Text\"].fillna(\"\").astype(str)\n",
-        "\n",
-        "if \"Class Name\" in reviews_df.columns:\n",
-        "    reviews_df[\"Class Name\"] = reviews_df[\"Class Name\"].fillna(\"Unknown\").astype(str)\n",
-        "\n",
-        "for col in [\"review_score\", \"likely_return\", \"price\", \"freight_value\", \"delivery_delay_days\", \"synthetic_return_risk\"]:\n",
-        "    if col in returns_df.columns:\n",
-        "        returns_df[col] = pd.to_numeric(returns_df[col], errors=\"coerce\")\n",
-        "\n",
-        "print(\"Data loaded and cleaned.\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 6,
-      "id": "f9eb3801",
-      "metadata": {
-        "id": "f9eb3801"
-      },
-      "outputs": [],
-      "source": [
-        "# ==================================================\n",
-        "# HELPERS: save artifacts where the app can find them\n",
-        "# ==================================================\n",
-        "# ==================================================\n",
-        "# HELPERS: save artifacts everywhere the app may check\n",
-        "# ==================================================\n",
-        "\n",
-        "def safe_write_csv(df, path):\n",
-        "    try:\n",
-        "        df.to_csv(path)\n",
-        "        return True\n",
-        "    except Exception as e:\n",
-        "        print(f\"Could not save {path}: {e}\")\n",
-        "        return False\n",
-        "\n",
-        "\n",
-        "def safe_savefig(path):\n",
-        "    try:\n",
-        "        plt.savefig(path, dpi=150, bbox_inches=\"tight\")\n",
-        "        return True\n",
-        "    except Exception as e:\n",
-        "        print(f\"Could not save {path}: {e}\")\n",
-        "        return False\n",
-        "\n",
-        "\n",
-        "def safe_write_text(text, path):\n",
-        "    try:\n",
-        "        path.write_text(text, encoding=\"utf-8\")\n",
-        "        return True\n",
-        "    except Exception as e:\n",
-        "        print(f\"Could not save {path}: {e}\")\n",
-        "        return False\n",
-        "\n",
-        "\n",
-        "def save_table(df, name):\n",
-        "    if isinstance(df, pd.Series):\n",
-        "        df = df.to_frame()\n",
-        "\n",
-        "    table_folders = [\n",
-        "        TABLES,\n",
-        "        OUTPUT_TABLES,\n",
-        "        OUTPUTS,\n",
-        "        ARTIFACT_TABLES,\n",
-        "        ARTIFACTS,\n",
-        "    ]\n",
-        "\n",
-        "    saved_anywhere = False\n",
-        "\n",
-        "    for folder in table_folders:\n",
-        "        folder.mkdir(parents=True, exist_ok=True)\n",
-        "        path = folder / f\"{name}.csv\"\n",
-        "        saved_anywhere = safe_write_csv(df, path) or saved_anywhere\n",
-        "\n",
-        "    if saved_anywhere:\n",
-        "        print(f\"Saved table everywhere: {name}.csv\")\n",
-        "    else:\n",
-        "        raise RuntimeError(f\"Could not save table {name}.csv\")\n",
-        "\n",
-        "\n",
-        "def save_figure(name):\n",
-        "    figure_folders = [\n",
-        "        FIGURES,\n",
-        "        OUTPUT_FIGURES,\n",
-        "        OUTPUTS,\n",
-        "        ARTIFACT_FIGURES,\n",
-        "        ARTIFACTS,\n",
-        "    ]\n",
-        "\n",
-        "    saved_anywhere = False\n",
-        "\n",
-        "    for folder in figure_folders:\n",
-        "        folder.mkdir(parents=True, exist_ok=True)\n",
-        "        path = folder / f\"{name}.png\"\n",
-        "        saved_anywhere = safe_savefig(path) or saved_anywhere\n",
-        "\n",
-        "    if saved_anywhere:\n",
-        "        print(f\"Saved figure everywhere: {name}.png\")\n",
-        "    else:\n",
-        "        raise RuntimeError(f\"Could not save figure {name}.png\")\n",
-        "\n",
-        "\n",
-        "def save_text(text, name):\n",
-        "    text_folders = [\n",
-        "        TABLES,\n",
-        "        OUTPUT_TABLES,\n",
-        "        OUTPUTS,\n",
-        "        ARTIFACT_TABLES,\n",
-        "        ARTIFACTS,\n",
-        "    ]\n",
-        "\n",
-        "    saved_anywhere = False\n",
-        "\n",
-        "    for folder in text_folders:\n",
-        "        folder.mkdir(parents=True, exist_ok=True)\n",
-        "        path = folder / f\"{name}.txt\"\n",
-        "        saved_anywhere = safe_write_text(text, path) or saved_anywhere\n",
-        "\n",
-        "    if saved_anywhere:\n",
-        "        print(f\"Saved text everywhere: {name}.txt\")\n",
-        "    else:\n",
-        "        raise RuntimeError(f\"Could not save text {name}.txt\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 7,
-      "id": "a99949ac",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "a99949ac",
-        "outputId": "33b9f5b0-67b0-4a44-8eef-b572cb8f7492"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Saved table everywhere: rating_distribution.csv\n",
-            "Saved figure everywhere: rating_distribution.png\n",
-            "Saved table everywhere: recommendation_by_class.csv\n",
-            "Saved figure everywhere: recommendation_by_class.png\n",
-            "Saved table everywhere: average_rating_by_age.csv\n",
-            "Saved figure everywhere: average_rating_by_age.png\n",
-            "Saved table everywhere: negative_keyword_counts.csv\n",
-            "Saved figure everywhere: negative_keyword_counts.png\n",
-            "Saved table everywhere: category_return_rate.csv\n",
-            "Saved figure everywhere: category_return_rate.png\n",
-            "Saved table everywhere: monthly_return_rate.csv\n",
-            "Saved figure everywhere: monthly_return_rate.png\n",
-            "Saved table everywhere: feature_importance.csv\n",
-            "Saved figure everywhere: feature_importance.png\n",
-            "Saved text everywhere: classification_report.txt\n",
-            "Artifact creation section finished.\n"
-          ]
-        }
-      ],
-      "source": [
-        "# ==================================================\n",
-        "# CREATE DASHBOARD ARTIFACTS\n",
-        "# ==================================================\n",
-        "\n",
-        "created_figures = []\n",
-        "created_tables = []\n",
-        "\n",
-        "# 1) Rating distribution\n",
-        "if \"Rating\" in reviews_df.columns:\n",
-        "    rating_distribution = reviews_df[\"Rating\"].dropna().value_counts().sort_index().to_frame(\"count\")\n",
-        "    save_table(rating_distribution, \"rating_distribution\")\n",
-        "    created_tables.append(\"rating_distribution.csv\")\n",
-        "\n",
-        "    plt.figure(figsize=(7, 4))\n",
-        "    plt.bar(rating_distribution.index.astype(str), rating_distribution[\"count\"])\n",
-        "    plt.title(\"Distribution of Customer Ratings\")\n",
-        "    plt.xlabel(\"Rating\")\n",
-        "    plt.ylabel(\"Number of Reviews\")\n",
-        "    plt.tight_layout()\n",
-        "    save_figure(\"rating_distribution\")\n",
-        "    created_figures.append(\"rating_distribution.png\")\n",
-        "    plt.close()\n",
-        "\n",
-        "# 2) Recommendation rate by clothing class\n",
-        "if {\"Class Name\", \"Recommended IND\"}.issubset(reviews_df.columns):\n",
-        "    recommendation_by_class = (\n",
-        "        reviews_df.groupby(\"Class Name\")[\"Recommended IND\"]\n",
-        "        .mean()\n",
-        "        .sort_values(ascending=False)\n",
-        "        .head(10)\n",
-        "        .to_frame(\"recommendation_rate\")\n",
-        "    )\n",
-        "    save_table(recommendation_by_class, \"recommendation_by_class\")\n",
-        "    created_tables.append(\"recommendation_by_class.csv\")\n",
-        "\n",
-        "    plt.figure(figsize=(10, 5))\n",
-        "    plt.bar(recommendation_by_class.index.astype(str), recommendation_by_class[\"recommendation_rate\"])\n",
-        "    plt.title(\"Top 10 Most Recommended Clothing Classes\")\n",
-        "    plt.xlabel(\"Class Name\")\n",
-        "    plt.ylabel(\"Recommendation Rate\")\n",
-        "    plt.xticks(rotation=75)\n",
-        "    plt.tight_layout()\n",
-        "    save_figure(\"recommendation_by_class\")\n",
-        "    created_figures.append(\"recommendation_by_class.png\")\n",
-        "    plt.close()\n",
-        "\n",
-        "# 3) Average rating by age\n",
-        "if {\"Age\", \"Rating\"}.issubset(reviews_df.columns):\n",
-        "    average_rating_by_age = (\n",
-        "        reviews_df.groupby(\"Age\")[\"Rating\"]\n",
-        "        .mean()\n",
-        "        .dropna()\n",
-        "        .to_frame(\"average_rating\")\n",
-        "    )\n",
-        "    save_table(average_rating_by_age, \"average_rating_by_age\")\n",
-        "    created_tables.append(\"average_rating_by_age.csv\")\n",
-        "\n",
-        "    plt.figure(figsize=(10, 4))\n",
-        "    plt.plot(average_rating_by_age.index, average_rating_by_age[\"average_rating\"])\n",
-        "    plt.title(\"Average Rating by Customer Age\")\n",
-        "    plt.xlabel(\"Age\")\n",
-        "    plt.ylabel(\"Average Rating\")\n",
-        "    plt.tight_layout()\n",
-        "    save_figure(\"average_rating_by_age\")\n",
-        "    created_figures.append(\"average_rating_by_age.png\")\n",
-        "    plt.close()\n",
-        "\n",
-        "# 4) Complaint / return-risk keyword counts\n",
-        "review_text_column = None\n",
-        "for candidate in [\"Review Text\", \"review_text\", \"review_comment_message\"]:\n",
-        "    if candidate in reviews_df.columns:\n",
-        "        review_text_column = candidate\n",
-        "        break\n",
-        "\n",
-        "if review_text_column is not None:\n",
-        "    keywords = [\n",
-        "        \"bad\", \"poor\", \"cheap\", \"small\", \"large\", \"tight\", \"loose\",\n",
-        "        \"scratchy\", \"thin\", \"return\", \"returned\", \"disappointed\",\n",
-        "        \"quality\", \"fit\", \"sizing\", \"fabric\", \"uncomfortable\"\n",
-        "    ]\n",
-        "    text_series = reviews_df[review_text_column].fillna(\"\").astype(str).str.lower()\n",
-        "    keyword_counts = {}\n",
-        "    for word in keywords:\n",
-        "        keyword_counts[word] = int(text_series.str.contains(word, regex=False).sum())\n",
-        "\n",
-        "    negative_keyword_counts = (\n",
-        "        pd.DataFrame(keyword_counts.items(), columns=[\"keyword\", \"review_count\"])\n",
-        "        .sort_values(\"review_count\", ascending=False)\n",
-        "        .set_index(\"keyword\")\n",
-        "    )\n",
-        "    save_table(negative_keyword_counts, \"negative_keyword_counts\")\n",
-        "    created_tables.append(\"negative_keyword_counts.csv\")\n",
-        "\n",
-        "    top_keywords = negative_keyword_counts.head(10)\n",
-        "    plt.figure(figsize=(9, 4))\n",
-        "    plt.bar(top_keywords.index.astype(str), top_keywords[\"review_count\"])\n",
-        "    plt.title(\"Most Common Return-Risk Keywords in Reviews\")\n",
-        "    plt.xlabel(\"Keyword\")\n",
-        "    plt.ylabel(\"Number of Reviews\")\n",
-        "    plt.xticks(rotation=45)\n",
-        "    plt.tight_layout()\n",
-        "    save_figure(\"negative_keyword_counts\")\n",
-        "    created_figures.append(\"negative_keyword_counts.png\")\n",
-        "    plt.close()\n",
-        "\n",
-        "# 5) Product category return rate\n",
-        "if {\"product_category_name\", \"likely_return\"}.issubset(returns_df.columns):\n",
-        "    category_return_rate = (\n",
-        "        returns_df.groupby(\"product_category_name\")[\"likely_return\"]\n",
-        "        .mean()\n",
-        "        .sort_values(ascending=False)\n",
-        "        .head(15)\n",
-        "        .to_frame(\"return_rate\")\n",
-        "    )\n",
-        "    save_table(category_return_rate, \"category_return_rate\")\n",
-        "    created_tables.append(\"category_return_rate.csv\")\n",
-        "\n",
-        "    plt.figure(figsize=(11, 5))\n",
-        "    plt.bar(category_return_rate.index.astype(str), category_return_rate[\"return_rate\"])\n",
-        "    plt.title(\"Top Product Categories by Estimated Return Rate\")\n",
-        "    plt.xlabel(\"Product Category\")\n",
-        "    plt.ylabel(\"Return Rate\")\n",
-        "    plt.xticks(rotation=75)\n",
-        "    plt.tight_layout()\n",
-        "    save_figure(\"category_return_rate\")\n",
-        "    created_figures.append(\"category_return_rate.png\")\n",
-        "    plt.close()\n",
-        "\n",
-        "# 6) Monthly return rate\n",
-        "if {\"order_purchase_timestamp\", \"likely_return\"}.issubset(returns_df.columns):\n",
-        "    monthly_df = returns_df.copy()\n",
-        "    monthly_df[\"order_purchase_timestamp\"] = pd.to_datetime(monthly_df[\"order_purchase_timestamp\"], errors=\"coerce\")\n",
-        "    monthly_df = monthly_df.dropna(subset=[\"order_purchase_timestamp\"])\n",
-        "\n",
-        "    if len(monthly_df) > 0:\n",
-        "        monthly_return_rate = (\n",
-        "            monthly_df.set_index(\"order_purchase_timestamp\")\n",
-        "            .resample(\"M\")[\"likely_return\"]\n",
-        "            .mean()\n",
-        "            .dropna()\n",
-        "            .to_frame(\"return_rate\")\n",
-        "        )\n",
-        "        save_table(monthly_return_rate, \"monthly_return_rate\")\n",
-        "        created_tables.append(\"monthly_return_rate.csv\")\n",
-        "\n",
-        "        plt.figure(figsize=(10, 4))\n",
-        "        plt.plot(monthly_return_rate.index, monthly_return_rate[\"return_rate\"])\n",
-        "        plt.title(\"Monthly Estimated Return Rate\")\n",
-        "        plt.xlabel(\"Month\")\n",
-        "        plt.ylabel(\"Return Rate\")\n",
-        "        plt.tight_layout()\n",
-        "        save_figure(\"monthly_return_rate\")\n",
-        "        created_figures.append(\"monthly_return_rate.png\")\n",
-        "        plt.close()\n",
-        "\n",
-        "# 7) Simple feature importance if sklearn is available\n",
-        "try:\n",
-        "    from sklearn.ensemble import RandomForestClassifier\n",
-        "    from sklearn.model_selection import train_test_split\n",
-        "    from sklearn.metrics import accuracy_score, classification_report\n",
-        "\n",
-        "    feature_columns = [c for c in [\"Age\", \"Rating\", \"Positive Feedback Count\"] if c in reviews_df.columns]\n",
-        "    if \"Recommended IND\" in reviews_df.columns and len(feature_columns) > 0:\n",
-        "        model_df = reviews_df[feature_columns + [\"Recommended IND\"]].dropna().copy()\n",
-        "        if model_df[\"Recommended IND\"].nunique() >= 2:\n",
-        "            X = model_df[feature_columns]\n",
-        "            y = model_df[\"Recommended IND\"].astype(int)\n",
-        "            X_train, X_test, y_train, y_test = train_test_split(\n",
-        "                X, y, test_size=0.2, random_state=42, stratify=y\n",
-        "            )\n",
-        "\n",
-        "            clf = RandomForestClassifier(n_estimators=100, random_state=42)\n",
-        "            clf.fit(X_train, y_train)\n",
-        "            predictions = clf.predict(X_test)\n",
-        "            accuracy = accuracy_score(y_test, predictions)\n",
-        "\n",
-        "            feature_importance = (\n",
-        "                pd.Series(clf.feature_importances_, index=feature_columns)\n",
-        "                .sort_values(ascending=False)\n",
-        "                .to_frame(\"importance\")\n",
-        "            )\n",
-        "            save_table(feature_importance, \"feature_importance\")\n",
-        "            created_tables.append(\"feature_importance.csv\")\n",
-        "\n",
-        "            plt.figure(figsize=(7, 4))\n",
-        "            plt.bar(feature_importance.index.astype(str), feature_importance[\"importance\"])\n",
-        "            plt.title(\"Feature Importance for Recommendation Prediction\")\n",
-        "            plt.xlabel(\"Feature\")\n",
-        "            plt.ylabel(\"Importance\")\n",
-        "            plt.tight_layout()\n",
-        "            save_figure(\"feature_importance\")\n",
-        "            created_figures.append(\"feature_importance.png\")\n",
-        "            plt.close()\n",
-        "\n",
-        "            report = \"Model accuracy: {:.4f}\\n\\n{}\".format(\n",
-        "                accuracy,\n",
-        "                classification_report(y_test, predictions)\n",
-        "            )\n",
-        "            save_text(report, \"classification_report\")\n",
-        "except Exception as e:\n",
-        "    print(\"ML section skipped:\", repr(e))\n",
-        "\n",
-        "print(\"Artifact creation section finished.\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 8,
-      "id": "c4bbc916",
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "c4bbc916",
-        "outputId": "1dc63b01-ed81-47cd-cf56-3e193b2f87f2"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Saved table everywhere: dashboard_summary.csv\n",
-            "Saved text everywhere: business_insights_report.txt\n",
-            "STEP 2 COMPLETE.\n",
-            "Figures: ['average_rating_by_age.png', 'category_return_rate.png', 'feature_importance.png', 'monthly_return_rate.png', 'negative_keyword_counts.png', 'rating_distribution.png', 'recommendation_by_class.png']\n",
-            "Tables: ['average_rating_by_age.csv', 'category_return_rate.csv', 'dashboard_summary.csv', 'feature_importance.csv', 'monthly_return_rate.csv', 'negative_keyword_counts.csv', 'rating_distribution.csv', 'recommendation_by_class.csv']\n",
-            "Outputs: ['average_rating_by_age.csv', 'average_rating_by_age.png', 'business_insights_report.txt', 'category_return_rate.csv', 'category_return_rate.png', 'classification_report.txt', 'dashboard_summary.csv', 'feature_importance.csv', 'feature_importance.png', 'monthly_return_rate.csv', 'monthly_return_rate.png', 'negative_keyword_counts.csv', 'negative_keyword_counts.png', 'rating_distribution.csv', 'rating_distribution.png', 'recommendation_by_class.csv', 'recommendation_by_class.png']\n"
-          ]
-        }
-      ],
-      "source": [
-        "# ==================================================\n",
-        "# FINAL REPORT + MANIFEST\n",
-        "# ==================================================\n",
-        "\n",
-        "summary_rows = [\n",
-        "    {\"metric\": \"reviews_rows\", \"value\": int(len(reviews_df))},\n",
-        "    {\"metric\": \"returns_rows\", \"value\": int(len(returns_df))},\n",
-        "    {\"metric\": \"figures_created\", \"value\": int(len(list(FIGURES.glob(\"*.png\"))))},\n",
-        "    {\"metric\": \"tables_created\", \"value\": int(len(list(TABLES.glob(\"*.csv\"))))},\n",
-        "]\n",
-        "\n",
-        "summary_df = pd.DataFrame(summary_rows).set_index(\"metric\")\n",
-        "save_table(summary_df, \"dashboard_summary\")\n",
-        "\n",
-        "insights = \"\"\"\n",
-        "FINAL BUSINESS INSIGHTS\n",
-        "=======================\n",
-        "\n",
-        "This analysis supports an e-commerce return prediction and review intelligence assistant.\n",
-        "\n",
-        "Main findings:\n",
-        "- Customer ratings and recommendation behavior are useful signals for product satisfaction.\n",
-        "- Review text reveals return-risk themes such as fit, sizing, fabric, quality, and discomfort.\n",
-        "- Product categories with higher estimated return rates should be prioritized for improvement.\n",
-        "- Monthly return-rate tracking can help the business monitor operational or seasonal changes.\n",
-        "\n",
-        "Recommended automations:\n",
-        "1. Automatically scan new reviews for return-risk keywords.\n",
-        "2. Automatically rank products and categories by estimated return risk.\n",
-        "3. Automatically generate business recommendations for product pages, sizing guidance, and quality control.\n",
-        "\"\"\"\n",
-        "\n",
-        "save_text(insights, \"business_insights_report\")\n",
-        "\n",
-        "manifest = {\n",
-        "    \"base_path\": str(BASE_PATH),\n",
-        "    \"figures\": sorted([p.name for p in FIGURES.glob(\"*.png\")]),\n",
-        "    \"tables\": sorted([p.name for p in TABLES.glob(\"*.csv\")]),\n",
-        "    \"outputs\": sorted([p.name for p in OUTPUTS.iterdir() if p.is_file()]),\n",
-        "}\n",
-        "\n",
-        "for folder in [OUTPUTS, ARTIFACTS, TABLES]:\n",
-        "    try:\n",
-        "        with open(folder / \"artifacts_manifest.json\", \"w\", encoding=\"utf-8\") as f:\n",
-        "            json.dump(manifest, f, indent=2)\n",
-        "    except Exception as e:\n",
-        "        print(f\"Could not save manifest in {folder}: {e}\")\n",
-        "\n",
-        "print(\"STEP 2 COMPLETE.\")\n",
-        "print(\"Figures:\", manifest[\"figures\"])\n",
-        "print(\"Tables:\", manifest[\"tables\"])\n",
-        "print(\"Outputs:\", manifest[\"outputs\"])"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "print(\"\\nFINAL ARTIFACT CHECK\")\n",
-        "\n",
-        "check_dirs = {\n",
-        "    \"FIGURES\": FIGURES,\n",
-        "    \"TABLES\": TABLES,\n",
-        "    \"OUTPUTS\": OUTPUTS,\n",
-        "    \"OUTPUT_FIGURES\": OUTPUT_FIGURES,\n",
-        "    \"OUTPUT_TABLES\": OUTPUT_TABLES,\n",
-        "    \"ARTIFACTS\": ARTIFACTS,\n",
-        "    \"ARTIFACT_FIGURES\": ARTIFACT_FIGURES,\n",
-        "    \"ARTIFACT_TABLES\": ARTIFACT_TABLES,\n",
-        "}\n",
-        "\n",
-        "for label, folder in check_dirs.items():\n",
-        "    files = sorted([p.name for p in folder.iterdir() if p.is_file()])\n",
-        "    print(label, \"=\", files)"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "fexa62gDM2c7",
-        "outputId": "e84626f3-e126-43f8-a408-665ccd7eb914"
-      },
-      "id": "fexa62gDM2c7",
-      "execution_count": 9,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "\n",
-            "FINAL ARTIFACT CHECK\n",
-            "FIGURES = ['average_rating_by_age.png', 'category_return_rate.png', 'feature_importance.png', 'monthly_return_rate.png', 'negative_keyword_counts.png', 'rating_distribution.png', 'recommendation_by_class.png']\n",
-            "TABLES = ['artifacts_manifest.json', 'average_rating_by_age.csv', 'business_insights_report.txt', 'category_return_rate.csv', 'classification_report.txt', 'dashboard_summary.csv', 'feature_importance.csv', 'monthly_return_rate.csv', 'negative_keyword_counts.csv', 'rating_distribution.csv', 'recommendation_by_class.csv']\n",
-            "OUTPUTS = ['artifacts_manifest.json', 'average_rating_by_age.csv', 'average_rating_by_age.png', 'business_insights_report.txt', 'category_return_rate.csv', 'category_return_rate.png', 'classification_report.txt', 'dashboard_summary.csv', 'feature_importance.csv', 'feature_importance.png', 'monthly_return_rate.csv', 'monthly_return_rate.png', 'negative_keyword_counts.csv', 'negative_keyword_counts.png', 'rating_distribution.csv', 'rating_distribution.png', 'recommendation_by_class.csv', 'recommendation_by_class.png']\n",
-            "OUTPUT_FIGURES = ['average_rating_by_age.png', 'category_return_rate.png', 'feature_importance.png', 'monthly_return_rate.png', 'negative_keyword_counts.png', 'rating_distribution.png', 'recommendation_by_class.png']\n",
-            "OUTPUT_TABLES = ['average_rating_by_age.csv', 'business_insights_report.txt', 'category_return_rate.csv', 'classification_report.txt', 'dashboard_summary.csv', 'feature_importance.csv', 'monthly_return_rate.csv', 'negative_keyword_counts.csv', 'rating_distribution.csv', 'recommendation_by_class.csv']\n",
-            "ARTIFACTS = ['artifacts_manifest.json', 'average_rating_by_age.csv', 'average_rating_by_age.png', 'business_insights_report.txt', 'category_return_rate.csv', 'category_return_rate.png', 'classification_report.txt', 'dashboard_summary.csv', 'feature_importance.csv', 'feature_importance.png', 'monthly_return_rate.csv', 'monthly_return_rate.png', 'negative_keyword_counts.csv', 'negative_keyword_counts.png', 'rating_distribution.csv', 'rating_distribution.png', 'recommendation_by_class.csv', 'recommendation_by_class.png']\n",
-            "ARTIFACT_FIGURES = ['average_rating_by_age.png', 'category_return_rate.png', 'feature_importance.png', 'monthly_return_rate.png', 'negative_keyword_counts.png', 'rating_distribution.png', 'recommendation_by_class.png']\n",
-            "ARTIFACT_TABLES = ['average_rating_by_age.csv', 'business_insights_report.txt', 'category_return_rate.csv', 'classification_report.txt', 'dashboard_summary.csv', 'feature_importance.csv', 'monthly_return_rate.csv', 'negative_keyword_counts.csv', 'rating_distribution.csv', 'recommendation_by_class.csv']\n"
-          ]
-        }
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# ==================================================\n",
-        "# FORCE DASHBOARD ARTIFACTS FOR SE21 HUGGING FACE APP\n",
-        "# Put this as the VERY LAST CELL of pythonanalysis.ipynb\n",
-        "# ==================================================\n",
-        "\n",
-        "import os\n",
-        "import json\n",
-        "from pathlib import Path\n",
-        "\n",
-        "import pandas as pd\n",
-        "import numpy as np\n",
-        "\n",
-        "import matplotlib\n",
-        "matplotlib.use(\"Agg\")\n",
-        "import matplotlib.pyplot as plt\n",
-        "\n",
-        "# Detect runtime\n",
-        "if Path(\"/app\").exists():\n",
-        "    BASE_PATH = Path(\"/app\")\n",
-        "elif Path(\"/content\").exists():\n",
-        "    BASE_PATH = Path(\"/content\")\n",
-        "else:\n",
-        "    BASE_PATH = Path.cwd()\n",
-        "\n",
-        "# THESE ARE THE EXACT FOLDERS app.py READS\n",
-        "PY_FIG_DIR = BASE_PATH / \"artifacts\" / \"py\" / \"figures\"\n",
-        "PY_TAB_DIR = BASE_PATH / \"artifacts\" / \"py\" / \"tables\"\n",
-        "\n",
-        "PY_FIG_DIR.mkdir(parents=True, exist_ok=True)\n",
-        "PY_TAB_DIR.mkdir(parents=True, exist_ok=True)\n",
-        "\n",
-        "print(\"Saving dashboard artifacts to:\")\n",
-        "print(\"Figures:\", PY_FIG_DIR)\n",
-        "print(\"Tables:\", PY_TAB_DIR)\n",
-        "\n",
-        "# Find CSV files\n",
-        "csv_paths = [\n",
-        "    p for p in BASE_PATH.rglob(\"*.csv\")\n",
-        "    if \"sample_data\" not in str(p)\n",
-        "    and \"artifacts\" not in str(p)\n",
-        "    and \"outputs\" not in str(p)\n",
-        "    and \"figures\" not in str(p)\n",
-        "    and \"tables\" not in str(p)\n",
-        "]\n",
-        "\n",
-        "print(\"CSV files found:\")\n",
-        "for p in csv_paths:\n",
-        "    print(\"-\", p)\n",
-        "\n",
-        "# Find reviews dataset\n",
-        "reviews_candidates = [\n",
-        "    BASE_PATH / \"data_processed\" / \"reviews_cleaned.csv\",\n",
-        "    BASE_PATH / \"Womens Clothing E-Commerce Reviews.csv\",\n",
-        "]\n",
-        "\n",
-        "reviews_path = next((p for p in reviews_candidates if p.exists()), None)\n",
-        "\n",
-        "if reviews_path is None:\n",
-        "    matches = [\n",
-        "        p for p in csv_paths\n",
-        "        if \"clothing\" in p.name.lower() or \"review\" in p.name.lower()\n",
-        "    ]\n",
-        "    reviews_path = matches[0] if matches else None\n",
-        "\n",
-        "# Find returns dataset\n",
-        "returns_candidates = [\n",
-        "    BASE_PATH / \"data_processed\" / \"returns_input.csv\",\n",
-        "    BASE_PATH / \"data_processed\" / \"returns_cleaned.csv\",\n",
-        "    BASE_PATH / \"ecommerce_returns_cleaned.csv\",\n",
-        "    BASE_PATH / \"data_processed\" / \"synthetic_return_risk.csv\",\n",
-        "]\n",
-        "\n",
-        "returns_path = next((p for p in returns_candidates if p.exists()), None)\n",
-        "\n",
-        "if returns_path is None:\n",
-        "    matches = [\n",
-        "        p for p in csv_paths\n",
-        "        if \"return\" in p.name.lower()\n",
-        "    ]\n",
-        "    returns_path = matches[0] if matches else None\n",
-        "\n",
-        "if reviews_path is None:\n",
-        "    raise FileNotFoundError(\"Could not find reviews CSV.\")\n",
-        "\n",
-        "if returns_path is None:\n",
-        "    raise FileNotFoundError(\"Could not find returns CSV.\")\n",
-        "\n",
-        "print(\"Using reviews:\", reviews_path)\n",
-        "print(\"Using returns:\", returns_path)\n",
-        "\n",
-        "reviews_df = pd.read_csv(reviews_path).drop(columns=[\"Unnamed: 0\"], errors=\"ignore\")\n",
-        "returns_df = pd.read_csv(returns_path).drop(columns=[\"Unnamed: 0\"], errors=\"ignore\")\n",
-        "\n",
-        "print(\"Reviews shape:\", reviews_df.shape)\n",
-        "print(\"Returns shape:\", returns_df.shape)\n",
-        "\n",
-        "# --------------------------------------------------\n",
-        "# 1. Rating distribution\n",
-        "# --------------------------------------------------\n",
-        "if \"Rating\" in reviews_df.columns:\n",
-        "    rating_distribution = (\n",
-        "        reviews_df[\"Rating\"]\n",
-        "        .dropna()\n",
-        "        .value_counts()\n",
-        "        .sort_index()\n",
-        "        .reset_index()\n",
-        "    )\n",
-        "    rating_distribution.columns = [\"rating\", \"count\"]\n",
-        "\n",
-        "    rating_distribution.to_csv(PY_TAB_DIR / \"rating_distribution.csv\", index=False)\n",
-        "\n",
-        "    plt.figure(figsize=(7, 4))\n",
-        "    plt.bar(rating_distribution[\"rating\"].astype(str), rating_distribution[\"count\"])\n",
-        "    plt.title(\"Distribution of Customer Ratings\")\n",
-        "    plt.xlabel(\"Rating\")\n",
-        "    plt.ylabel(\"Number of Reviews\")\n",
-        "    plt.tight_layout()\n",
-        "    plt.savefig(PY_FIG_DIR / \"rating_distribution.png\", dpi=150, bbox_inches=\"tight\")\n",
-        "    plt.close()\n",
-        "\n",
-        "# --------------------------------------------------\n",
-        "# 2. Sentiment counts for app's sentiment chart\n",
-        "# The app specifically looks for sentiment_counts_sampled.csv\n",
-        "# --------------------------------------------------\n",
-        "if \"Rating\" in reviews_df.columns:\n",
-        "    temp = reviews_df.copy()\n",
-        "\n",
-        "    def rating_to_sentiment(r):\n",
-        "        try:\n",
-        "            r = float(r)\n",
-        "            if r <= 2:\n",
-        "                return \"negative\"\n",
-        "            elif r == 3:\n",
-        "                return \"neutral\"\n",
-        "            else:\n",
-        "                return \"positive\"\n",
-        "        except:\n",
-        "            return \"neutral\"\n",
-        "\n",
-        "    temp[\"sentiment\"] = temp[\"Rating\"].apply(rating_to_sentiment)\n",
-        "\n",
-        "    group_col = \"Class Name\" if \"Class Name\" in temp.columns else None\n",
-        "\n",
-        "    if group_col:\n",
-        "        sentiment_counts = (\n",
-        "            temp.groupby([group_col, \"sentiment\"])\n",
-        "            .size()\n",
-        "            .unstack(fill_value=0)\n",
-        "            .reset_index()\n",
-        "            .head(15)\n",
-        "        )\n",
-        "        sentiment_counts = sentiment_counts.rename(columns={group_col: \"title\"})\n",
-        "    else:\n",
-        "        sentiment_counts = (\n",
-        "            temp[\"sentiment\"]\n",
-        "            .value_counts()\n",
-        "            .to_frame()\n",
-        "            .T\n",
-        "            .reset_index(drop=True)\n",
-        "        )\n",
-        "        sentiment_counts.insert(0, \"title\", \"All Reviews\")\n",
-        "\n",
-        "    for col in [\"negative\", \"neutral\", \"positive\"]:\n",
-        "        if col not in sentiment_counts.columns:\n",
-        "            sentiment_counts[col] = 0\n",
-        "\n",
-        "    sentiment_counts[[\"title\", \"negative\", \"neutral\", \"positive\"]].to_csv(\n",
-        "        PY_TAB_DIR / \"sentiment_counts_sampled.csv\",\n",
-        "        index=False\n",
-        "    )\n",
-        "\n",
-        "    # Also save a normal figure\n",
-        "    sentiment_total = temp[\"sentiment\"].value_counts().reindex(\n",
-        "        [\"negative\", \"neutral\", \"positive\"],\n",
-        "        fill_value=0\n",
-        "    )\n",
-        "\n",
-        "    plt.figure(figsize=(7, 4))\n",
-        "    plt.bar(sentiment_total.index, sentiment_total.values)\n",
-        "    plt.title(\"Review Sentiment Distribution\")\n",
-        "    plt.xlabel(\"Sentiment\")\n",
-        "    plt.ylabel(\"Number of Reviews\")\n",
-        "    plt.tight_layout()\n",
-        "    plt.savefig(PY_FIG_DIR / \"sentiment_distribution.png\", dpi=150, bbox_inches=\"tight\")\n",
-        "    plt.close()\n",
-        "\n",
-        "# --------------------------------------------------\n",
-        "# 3. Category return rate\n",
-        "# --------------------------------------------------\n",
-        "return_col = None\n",
-        "for candidate in [\"likely_return\", \"synthetic_return_risk\", \"returned\", \"return_flag\"]:\n",
-        "    if candidate in returns_df.columns:\n",
-        "        return_col = candidate\n",
-        "        break\n",
-        "\n",
-        "category_col = None\n",
-        "for candidate in [\"product_category_name\", \"category\", \"Class Name\", \"product_id\"]:\n",
-        "    if candidate in returns_df.columns:\n",
-        "        category_col = candidate\n",
-        "        break\n",
-        "\n",
-        "if return_col is not None:\n",
-        "    returns_df[return_col] = pd.to_numeric(returns_df[return_col], errors=\"coerce\")\n",
-        "\n",
-        "if return_col is not None and category_col is not None:\n",
-        "    category_return_rate = (\n",
-        "        returns_df.groupby(category_col)[return_col]\n",
-        "        .mean()\n",
-        "        .sort_values(ascending=False)\n",
-        "        .head(15)\n",
-        "        .reset_index()\n",
-        "    )\n",
-        "    category_return_rate.columns = [\"category\", \"return_rate\"]\n",
-        "\n",
-        "    category_return_rate.to_csv(PY_TAB_DIR / \"category_return_rate.csv\", index=False)\n",
-        "\n",
-        "    plt.figure(figsize=(11, 5))\n",
-        "    plt.bar(category_return_rate[\"category\"].astype(str), category_return_rate[\"return_rate\"])\n",
-        "    plt.title(\"Highest Return-Rate Categories\")\n",
-        "    plt.xlabel(\"Category\")\n",
-        "    plt.ylabel(\"Return Rate\")\n",
-        "    plt.xticks(rotation=75)\n",
-        "    plt.tight_layout()\n",
-        "    plt.savefig(PY_FIG_DIR / \"category_return_rate.png\", dpi=150, bbox_inches=\"tight\")\n",
-        "    plt.close()\n",
-        "\n",
-        "    # The template's AI fallback weirdly expects this filename for \"top\" questions.\n",
-        "    # We reuse it to show highest return-risk categories.\n",
-        "    top_titles_by_units_sold = category_return_rate.copy()\n",
-        "    top_titles_by_units_sold.columns = [\"title\", \"units_sold\"]\n",
-        "    top_titles_by_units_sold.to_csv(PY_TAB_DIR / \"top_titles_by_units_sold.csv\", index=False)\n",
-        "\n",
-        "# --------------------------------------------------\n",
-        "# 4. Dashboard time-series file\n",
-        "# The app's dashboard chart specifically looks for df_dashboard.csv\n",
-        "# --------------------------------------------------\n",
-        "if \"order_purchase_timestamp\" in returns_df.columns and return_col is not None:\n",
-        "    ts = returns_df.copy()\n",
-        "    ts[\"order_purchase_timestamp\"] = pd.to_datetime(\n",
-        "        ts[\"order_purchase_timestamp\"],\n",
-        "        errors=\"coerce\"\n",
-        "    )\n",
-        "    ts = ts.dropna(subset=[\"order_purchase_timestamp\"])\n",
-        "\n",
-        "    if not ts.empty:\n",
-        "        dashboard_df = (\n",
-        "            ts.set_index(\"order_purchase_timestamp\")\n",
-        "            .resample(\"M\")\n",
-        "            .agg(\n",
-        "                return_rate=(return_col, \"mean\"),\n",
-        "                orders=(return_col, \"count\")\n",
-        "            )\n",
-        "            .reset_index()\n",
-        "        )\n",
-        "        dashboard_df = dashboard_df.rename(columns={\"order_purchase_timestamp\": \"month\"})\n",
-        "    else:\n",
-        "        dashboard_df = pd.DataFrame({\n",
-        "            \"month\": pd.date_range(\"2024-01-01\", periods=3, freq=\"M\"),\n",
-        "            \"return_rate\": [0, 0, 0],\n",
-        "            \"orders\": [0, 0, 0],\n",
-        "        })\n",
-        "else:\n",
-        "    dashboard_df = pd.DataFrame({\n",
-        "        \"month\": pd.date_range(\"2024-01-01\", periods=3, freq=\"M\"),\n",
-        "        \"return_rate\": [0, 0, 0],\n",
-        "        \"orders\": [0, 0, 0],\n",
-        "    })\n",
-        "\n",
-        "dashboard_df.to_csv(PY_TAB_DIR / \"df_dashboard.csv\", index=False)\n",
-        "\n",
-        "plt.figure(figsize=(9, 4))\n",
-        "plt.plot(pd.to_datetime(dashboard_df[\"month\"]), dashboard_df[\"return_rate\"], marker=\"o\")\n",
-        "plt.title(\"Monthly Estimated Return Rate\")\n",
-        "plt.xlabel(\"Month\")\n",
-        "plt.ylabel(\"Return Rate\")\n",
-        "plt.tight_layout()\n",
-        "plt.savefig(PY_FIG_DIR / \"monthly_return_rate.png\", dpi=150, bbox_inches=\"tight\")\n",
-        "plt.close()\n",
-        "\n",
-        "# --------------------------------------------------\n",
-        "# 5. KPIs\n",
-        "# --------------------------------------------------\n",
-        "kpis = {\n",
-        "    \"reviews_rows\": int(len(reviews_df)),\n",
-        "    \"returns_rows\": int(len(returns_df)),\n",
-        "    \"n_titles\": int(reviews_df[\"Clothing ID\"].nunique()) if \"Clothing ID\" in reviews_df.columns else int(len(reviews_df)),\n",
-        "    \"n_months\": int(len(dashboard_df)),\n",
-        "    \"total_units_sold\": int(len(returns_df)),\n",
-        "    \"estimated_return_rate\": float(returns_df[return_col].mean()) if return_col is not None else None,\n",
-        "}\n",
-        "\n",
-        "with open(PY_TAB_DIR / \"kpis.json\", \"w\", encoding=\"utf-8\") as f:\n",
-        "    json.dump(kpis, f, indent=2)\n",
-        "\n",
-        "# --------------------------------------------------\n",
-        "# Final verification\n",
-        "# --------------------------------------------------\n",
-        "print(\"\\nFORCE ARTIFACT CELL RAN SUCCESSFULLY\")\n",
-        "print(\"Figures now in app-readable folder:\")\n",
-        "print(sorted([p.name for p in PY_FIG_DIR.glob(\"*\")]))\n",
-        "\n",
-        "print(\"Tables now in app-readable folder:\")\n",
-        "print(sorted([p.name for p in PY_TAB_DIR.glob(\"*\")]))"
-      ],
-      "metadata": {
-        "id": "G-jXRriWP1TW",
-        "outputId": "23349a23-0bdc-476f-fb72-8e388be9630c",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        }
-      },
-      "id": "G-jXRriWP1TW",
-      "execution_count": 10,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Saving dashboard artifacts to:\n",
-            "Figures: /content/artifacts/py/figures\n",
-            "Tables: /content/artifacts/py/tables\n",
-            "CSV files found:\n",
-            "- /content/Womens Clothing E-Commerce Reviews.csv\n",
-            "- /content/ecommerce_returns_cleaned.csv\n",
-            "Using reviews: /content/Womens Clothing E-Commerce Reviews.csv\n",
-            "Using returns: /content/ecommerce_returns_cleaned.csv\n",
-            "Reviews shape: (23486, 10)\n",
-            "Returns shape: (113314, 29)\n",
-            "\n",
-            "FORCE ARTIFACT CELL RAN SUCCESSFULLY\n",
-            "Figures now in app-readable folder:\n",
-            "['category_return_rate.png', 'monthly_return_rate.png', 'rating_distribution.png', 'sentiment_distribution.png']\n",
-            "Tables now in app-readable folder:\n",
-            "['category_return_rate.csv', 'df_dashboard.csv', 'kpis.json', 'rating_distribution.csv', 'sentiment_counts_sampled.csv', 'top_titles_by_units_sold.csv']\n"
-          ]
-        }
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python",
-      "version": "3.10"
-    },
-    "colab": {
-      "provenance": []
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 5
-}