Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| import matplotlib.dates as mdates | |
| import seaborn as sns | |
| from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer | |
| import statsmodels.api as sm | |
| from itertools import product as iter_product | |
| import warnings | |
| import io | |
| warnings.filterwarnings("ignore") | |
| # ============================================================ | |
| # LOAD DATA | |
| # ============================================================ | |
| df_feedback = pd.read_csv("hotel_guest_feedback.csv") | |
| df_bookings = pd.read_csv("synthetic_booking_data.csv") | |
| df_reviews = pd.read_csv("synthetic_hotel_reviews.csv") | |
| df_bookings["month"] = pd.to_datetime(df_bookings["month"]) | |
| # ============================================================ | |
| # VADER SENTIMENT | |
| # ============================================================ | |
| analyzer = SentimentIntensityAnalyzer() | |
| def get_sentiment_label(text): | |
| score = analyzer.polarity_scores(str(text))["compound"] | |
| if score >= 0.05: | |
| return "positive" | |
| elif score <= -0.05: | |
| return "negative" | |
| else: | |
| return "neutral" | |
| df_reviews["vader_sentiment"] = df_reviews["review_text"].apply(get_sentiment_label) | |
| df_reviews["vader_compound"] = df_reviews["review_text"].apply( | |
| lambda x: analyzer.polarity_scores(str(x))["compound"] | |
| ) | |
| HOTEL_NAMES = sorted(df_bookings["hotel_name"].unique().tolist()) | |
| # ============================================================ | |
| # TAB 1: PIPELINE RUNNER | |
| # ============================================================ | |
| def run_pipeline(step): | |
| log = [] | |
| if step in ["Step 1: Data Loading", "Full Pipeline"]: | |
| log.append("โ Step 1: Data loaded successfully") | |
| log.append(f" - Guest feedback: {len(df_feedback)} rows") | |
| log.append(f" - Bookings: {len(df_bookings)} rows") | |
| log.append(f" - Reviews: {len(df_reviews)} rows") | |
| if step in ["Step 2: Sentiment Analysis", "Full Pipeline"]: | |
| pos = (df_reviews["vader_sentiment"] == "positive").sum() | |
| neu = (df_reviews["vader_sentiment"] == "neutral").sum() | |
| neg = (df_reviews["vader_sentiment"] == "negative").sum() | |
| log.append("โ Step 2: VADER sentiment analysis complete") | |
| log.append(f" - Positive: {pos} | Neutral: {neu} | Negative: {neg}") | |
| if step in ["Step 3: Pricing Decisions", "Full Pipeline"]: | |
| log.append("โ Step 3: Pricing decisions generated") | |
| avg_bookings = df_bookings.groupby("hotel_name")["bookings"].mean() | |
| sent_ratios = df_reviews.groupby(["hotel_name", "vader_sentiment"]).size().unstack(fill_value=0) | |
| sent_ratios["total"] = sent_ratios.sum(axis=1) | |
| sent_ratios["positive_ratio"] = sent_ratios.get("positive", 0) / sent_ratios["total"] | |
| sent_ratios["negative_ratio"] = sent_ratios.get("negative", 0) / sent_ratios["total"] | |
| for hotel in HOTEL_NAMES: | |
| avg_b = avg_bookings.get(hotel, 0) | |
| pos_r = sent_ratios.loc[hotel, "positive_ratio"] if hotel in sent_ratios.index else 0 | |
| neg_r = sent_ratios.loc[hotel, "negative_ratio"] if hotel in sent_ratios.index else 0 | |
| if avg_b >= 200 and pos_r >= 0.6: | |
| action = "๐ Increase Price" | |
| elif avg_b <= 100 and neg_r >= 0.4: | |
| action = "๐ Decrease Price" | |
| else: | |
| action = "โก๏ธ Keep Price" | |
| log.append(f" {hotel}: {action}") | |
| return "\n".join(log) | |
| # ============================================================ | |
| # TAB 2: DASHBOARD | |
| # ============================================================ | |
| def generate_dashboard(chart_type): | |
| fig, ax = plt.subplots(figsize=(12, 6)) | |
| if chart_type == "Revenue Trends": | |
| colors = sns.color_palette("tab10", len(HOTEL_NAMES)) | |
| for i, hotel in enumerate(HOTEL_NAMES): | |
| data = df_bookings[df_bookings["hotel_name"] == hotel] | |
| ax.plot(data["month"], data["revenue"], label=hotel, color=colors[i], linewidth=1.5) | |
| ax.set_title("Monthly Revenue Trends by Hotel", fontsize=14) | |
| ax.set_ylabel("Revenue (โฌ)") | |
| ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", fontsize=7) | |
| ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m")) | |
| plt.xticks(rotation=45) | |
| elif chart_type == "Sentiment Distribution": | |
| sent_counts = df_reviews.groupby(["hotel_name", "vader_sentiment"]).size().unstack(fill_value=0) | |
| sent_counts = sent_counts.reindex(columns=["positive", "neutral", "negative"], fill_value=0) | |
| sent_counts.plot(kind="bar", stacked=True, ax=ax, color=["#2ecc71", "#f1c40f", "#e74c3c"]) | |
| ax.set_title("Sentiment Distribution by Hotel", fontsize=14) | |
| ax.set_ylabel("Number of Reviews") | |
| plt.xticks(rotation=45, ha="right") | |
| elif chart_type == "Bookings Trends": | |
| colors = sns.color_palette("tab10", len(HOTEL_NAMES)) | |
| for i, hotel in enumerate(HOTEL_NAMES): | |
| data = df_bookings[df_bookings["hotel_name"] == hotel] | |
| ax.plot(data["month"], data["bookings"], label=hotel, color=colors[i], linewidth=1.5) | |
| ax.set_title("Monthly Bookings by Hotel", fontsize=14) | |
| ax.set_ylabel("Number of Bookings") | |
| ax.legend(bbox_to_anchor=(1.05, 1), loc="upper left", fontsize=7) | |
| ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y-%m")) | |
| plt.xticks(rotation=45) | |
| elif chart_type == "Correlation Heatmap": | |
| hotel_avg_sent = df_reviews.groupby("hotel_name")["vader_compound"].mean().reset_index() | |
| hotel_avg_sent.columns = ["hotel_name", "avg_sentiment"] | |
| booking_summary = df_bookings.groupby("hotel_name").agg( | |
| avg_bookings=("bookings", "mean"), | |
| avg_rate=("avg_nightly_rate", "mean"), | |
| avg_revenue=("revenue", "mean") | |
| ).reset_index() | |
| merged = booking_summary.merge(hotel_avg_sent, on="hotel_name") | |
| sns.heatmap(merged[["avg_bookings", "avg_rate", "avg_revenue", "avg_sentiment"]].corr(), | |
| annot=True, cmap="coolwarm", center=0, fmt=".2f", ax=ax) | |
| ax.set_title("Correlation Heatmap", fontsize=14) | |
| plt.tight_layout() | |
| return fig | |
| # ============================================================ | |
| # TAB 3: AI DASHBOARD (Hotel Profile + Forecast) | |
| # ============================================================ | |
| def hotel_profile(hotel_name): | |
| # Sentiment summary | |
| hotel_revs = df_reviews[df_reviews["hotel_name"] == hotel_name] | |
| total = len(hotel_revs) | |
| pos = (hotel_revs["vader_sentiment"] == "positive").sum() | |
| neu = (hotel_revs["vader_sentiment"] == "neutral").sum() | |
| neg = (hotel_revs["vader_sentiment"] == "negative").sum() | |
| avg_comp = hotel_revs["vader_compound"].mean() | |
| # Booking summary | |
| hotel_book = df_bookings[df_bookings["hotel_name"] == hotel_name] | |
| avg_bookings = hotel_book["bookings"].mean() | |
| avg_rate = hotel_book["avg_nightly_rate"].mean() | |
| avg_revenue = hotel_book["revenue"].mean() | |
| # Pricing decision | |
| pos_ratio = pos / total if total > 0 else 0 | |
| neg_ratio = neg / total if total > 0 else 0 | |
| if avg_bookings >= 200 and pos_ratio >= 0.6: | |
| action = "๐ INCREASE PRICE" | |
| elif avg_bookings <= 100 and neg_ratio >= 0.4: | |
| action = "๐ DECREASE PRICE" | |
| else: | |
| action = "โก๏ธ KEEP PRICE" | |
| report = f""" | |
| ๐จ HOTEL PROFILE: {hotel_name} | |
| {'='*50} | |
| ๐ SENTIMENT ({total} reviews): | |
| Positive: {pos} ({pos_ratio:.0%}) | |
| Neutral: {neu} ({neu/total:.0%}) | |
| Negative: {neg} ({neg_ratio:.0%}) | |
| Avg Score: {avg_comp:.3f} | |
| ๐ BOOKINGS (18 months): | |
| Avg Monthly Bookings: {avg_bookings:.0f} | |
| Avg Nightly Rate: โฌ{avg_rate:.0f} | |
| Avg Monthly Revenue: โฌ{avg_revenue:,.0f} | |
| ๐ฐ PRICING RECOMMENDATION: {action} | |
| """ | |
| # ARIMA forecast chart | |
| revenue_series = hotel_book.sort_values("month").set_index("month")["revenue"] | |
| fig, ax = plt.subplots(figsize=(12, 5)) | |
| try: | |
| best_aic = float("inf") | |
| best_order = None | |
| best_model = None | |
| for p, d, q in iter_product(range(0, 4), range(0, 3), range(0, 2)): | |
| try: | |
| model = sm.tsa.ARIMA(revenue_series, order=(p, d, q)) | |
| fitted = model.fit() | |
| if fitted.aic < best_aic: | |
| best_aic = fitted.aic | |
| best_order = (p, d, q) | |
| best_model = fitted | |
| except: | |
| continue | |
| if best_model: | |
| forecast = best_model.forecast(steps=6) | |
| forecast_idx = pd.date_range(start=revenue_series.index[-1] + pd.DateOffset(months=1), | |
| periods=6, freq="ME") | |
| ax.plot(revenue_series.index, revenue_series.values, "b-", label="Historical") | |
| ax.plot(forecast_idx, forecast.values, "r--", label=f"Forecast ARIMA{best_order}") | |
| ax.set_title(f"Revenue Forecast โ {hotel_name}", fontsize=14) | |
| ax.set_ylabel("Revenue (โฌ)") | |
| ax.legend() | |
| plt.xticks(rotation=45) | |
| report += f"\n๐ฎ FORECAST (ARIMA{best_order}):\n" | |
| for date, val in zip(forecast_idx, forecast.values): | |
| report += f" {date.strftime('%Y-%m')}: โฌ{val:,.0f}\n" | |
| except Exception as e: | |
| report += f"\nโ ๏ธ Forecast error: {str(e)}" | |
| ax.text(0.5, 0.5, "Forecast unavailable", ha="center", transform=ax.transAxes) | |
| plt.tight_layout() | |
| return report, fig | |
| # ============================================================ | |
| # BUILD GRADIO APP | |
| # ============================================================ | |
| with gr.Blocks(title="Hotel Pricing Optimizer", theme=gr.themes.Soft()) as app: | |
| gr.Markdown(""" | |
| # ๐จ Hotel Pricing Optimizer | |
| *AI-powered luxury hotel pricing and service quality analysis* | |
| """) | |
| with gr.Tab("Pipeline Runner"): | |
| step_dropdown = gr.Dropdown( | |
| choices=["Step 1: Data Loading", "Step 2: Sentiment Analysis", | |
| "Step 3: Pricing Decisions", "Full Pipeline"], | |
| label="Select Step", value="Full Pipeline" | |
| ) | |
| run_btn = gr.Button("Run Pipeline", variant="primary") | |
| log_output = gr.Textbox(label="Execution Log", lines=20) | |
| run_btn.click(fn=run_pipeline, inputs=step_dropdown, outputs=log_output) | |
| with gr.Tab("Dashboard"): | |
| chart_dropdown = gr.Dropdown( | |
| choices=["Revenue Trends", "Sentiment Distribution", | |
| "Bookings Trends", "Correlation Heatmap"], | |
| label="Select Chart", value="Revenue Trends" | |
| ) | |
| chart_btn = gr.Button("Generate Chart", variant="primary") | |
| chart_output = gr.Plot(label="Chart") | |
| chart_btn.click(fn=generate_dashboard, inputs=chart_dropdown, outputs=chart_output) | |
| with gr.Tab("AI Dashboard"): | |
| hotel_dropdown = gr.Dropdown(choices=HOTEL_NAMES, label="Select Hotel", value=HOTEL_NAMES[0]) | |
| profile_btn = gr.Button("Generate Hotel Profile & Forecast", variant="primary") | |
| profile_text = gr.Textbox(label="Hotel Profile", lines=25) | |
| forecast_plot = gr.Plot(label="Revenue Forecast") | |
| profile_btn.click(fn=hotel_profile, inputs=hotel_dropdown, outputs=[profile_text, forecast_plot]) | |
| app.launch() | |