Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| # Fixed non-click columns | |
| NON_CLICK_COLS = {"date", "month", "year", "blogs", "h1", "meta title", | |
| "meta description", "total clicks", "total_clicks"} | |
| TIER_CONFIG = { | |
| "takedown_zero_max": 0, | |
| "takedown_low_max": 5, | |
| "takedown_low_monthly": 2, | |
| "monitor_min": 6, | |
| "monitor_max": 20, | |
| "performing_min": 21, | |
| "performing_max": 100, | |
| "strong_min": 101, | |
| "strong_max": 500, | |
| "top_min": 501, | |
| } | |
| def detect_columns(df: pd.DataFrame) -> dict: | |
| """ | |
| Auto-detect URL, title, month, and total_clicks columns | |
| from any CSV with the expected structure. | |
| Returns a dict with keys: url_col, title_col, month_cols, total_col | |
| """ | |
| cols_lower = {c.lower(): c for c in df.columns} | |
| # URL column β 'Blogs' or any col containing http values | |
| url_col = cols_lower.get("blogs") or cols_lower.get("url") or cols_lower.get("urls") | |
| if not url_col: | |
| for col in df.columns: | |
| sample = df[col].dropna().astype(str).head(5) | |
| if sample.str.startswith("http").any(): | |
| url_col = col | |
| break | |
| # Title column β 'H1' or 'Title' | |
| title_col = cols_lower.get("h1") or cols_lower.get("title") or cols_lower.get("meta title") | |
| # Total clicks column | |
| total_col = cols_lower.get("total clicks") or cols_lower.get("total_clicks") | |
| # Month columns β numeric columns not in the known set | |
| month_cols = [] | |
| for col in df.columns: | |
| if col.lower() in NON_CLICK_COLS: | |
| continue | |
| if pd.api.types.is_numeric_dtype(df[col]) or _is_mostly_numeric(df[col]): | |
| month_cols.append(col) | |
| # Remove total_col from month_cols if accidentally included | |
| if total_col and total_col in month_cols: | |
| month_cols.remove(total_col) | |
| return { | |
| "url_col": url_col, | |
| "title_col": title_col, | |
| "month_cols": month_cols, | |
| "total_col": total_col, | |
| } | |
| def _is_mostly_numeric(series: pd.Series, threshold=0.7) -> bool: | |
| converted = pd.to_numeric(series, errors="coerce") | |
| valid = converted.notna().sum() | |
| return valid / max(len(series), 1) >= threshold | |
| def clean_and_tier(df: pd.DataFrame, col_map: dict) -> pd.DataFrame: | |
| """ | |
| Build a clean analysis dataframe with tier, trend, and slug columns. | |
| """ | |
| url_col = col_map["url_col"] | |
| title_col = col_map["title_col"] | |
| month_cols = col_map["month_cols"] | |
| total_col = col_map["total_col"] | |
| out = pd.DataFrame() | |
| out["url"] = df[url_col].astype(str).str.strip() | |
| out["title"] = df[title_col].astype(str).str.strip() if title_col else out["url"] | |
| out["slug"] = out["url"].apply( | |
| lambda x: x.split("/blog/")[-1] if "/blog/" in x else x.rstrip("/").split("/")[-1] | |
| ) | |
| for col in month_cols: | |
| out[col] = pd.to_numeric(df[col], errors="coerce").fillna(0).astype(int) | |
| if total_col: | |
| out["total_clicks"] = pd.to_numeric(df[total_col], errors="coerce").fillna(0).astype(int) | |
| else: | |
| out["total_clicks"] = out[month_cols].sum(axis=1) | |
| # Trend | |
| out["trend"] = out.apply(lambda r: _get_trend(r, month_cols), axis=1) | |
| # Tier | |
| out["tier"] = out.apply(lambda r: _assign_tier(r, month_cols), axis=1) | |
| # Recommended action | |
| out["action"] = out.apply(lambda r: _get_action(r["tier"], r["total_clicks"]), axis=1) | |
| return out | |
| def _get_trend(row, month_cols): | |
| if len(month_cols) < 2: | |
| return "β‘οΈ Stable" | |
| vals = [row[c] for c in month_cols] | |
| if vals[-1] > vals[0]: | |
| return "π Growing" | |
| elif vals[-1] < vals[0]: | |
| return "π Declining" | |
| return "β‘οΈ Stable" | |
| def _assign_tier(row, month_cols): | |
| c = row["total_clicks"] | |
| max_m = max([row[m] for m in month_cols]) if month_cols else 0 | |
| if c == 0: | |
| return "TAKEDOWN_ZERO" | |
| elif c <= TIER_CONFIG["takedown_low_max"] and max_m <= TIER_CONFIG["takedown_low_monthly"]: | |
| return "TAKEDOWN_LOW" | |
| elif TIER_CONFIG["monitor_min"] <= c <= TIER_CONFIG["monitor_max"]: | |
| return "MONITOR" | |
| elif TIER_CONFIG["performing_min"] <= c <= TIER_CONFIG["performing_max"]: | |
| return "PERFORMING" | |
| elif TIER_CONFIG["strong_min"] <= c <= TIER_CONFIG["strong_max"]: | |
| return "STRONG" | |
| elif c >= TIER_CONFIG["top_min"]: | |
| return "TOP" | |
| return "PERFORMING" | |
| def _get_action(tier, clicks): | |
| actions = { | |
| "TAKEDOWN_ZERO": "Remove immediately. Zero organic traction across all months. Set up 410 or 301 redirect.", | |
| "TAKEDOWN_LOW": "Merge into a stronger related article or remove. Implement 301 redirect.", | |
| "MONITOR": "Optimize meta title, description & keywords. Review in 90 days. Merge if no improvement.", | |
| "PERFORMING": "Refresh content, strengthen internal links, add FAQ schema. Push for top 50 clicks.", | |
| "STRONG": "Update statistics & examples. Add lead gen CTA. Build backlinks to reach 500+ clicks.", | |
| "TOP": "Priority asset. Add lead magnets, improve CTAs, build backlinks. Protect rankings.", | |
| } | |
| if tier == "MONITOR" and clicks >= 15: | |
| return "Good potential. Optimize meta description and add 2β3 internal links to improve CTR." | |
| return actions.get(tier, "Review manually.") | |
| def get_tier_summary(df: pd.DataFrame) -> dict: | |
| counts = df["tier"].value_counts().to_dict() | |
| return { | |
| "TAKEDOWN_ZERO": counts.get("TAKEDOWN_ZERO", 0), | |
| "TAKEDOWN_LOW": counts.get("TAKEDOWN_LOW", 0), | |
| "MONITOR": counts.get("MONITOR", 0), | |
| "PERFORMING": counts.get("PERFORMING", 0), | |
| "STRONG": counts.get("STRONG", 0), | |
| "TOP": counts.get("TOP", 0), | |
| } | |
| TIER_META = { | |
| "TAKEDOWN_ZERO": {"label": "π΄ Take Down (0 clicks)", "color": "#FF4B4B"}, | |
| "TAKEDOWN_LOW": {"label": "π Take Down (1β5 clicks)", "color": "#FF8C00"}, | |
| "MONITOR": {"label": "π‘ Monitor (6β20 clicks)", "color": "#FFC300"}, | |
| "PERFORMING": {"label": "β Performing (21β100 clicks)", "color": "#2ECC71"}, | |
| "STRONG": {"label": "πͺ Strong (101β500 clicks)", "color": "#1ABC9C"}, | |
| "TOP": {"label": "π Top Performers (500+ clicks)","color": "#9B59B6"}, | |
| } | |