import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go class RealEstateAnalytics: def __init__(self, search_system): self.search_system = search_system self.colors = { 'primary': ['#2E3B4E', '#405D8C', '#557ABC', '#7295D7', '#9FB5E6'], 'accent': '#FF6B6B', 'background': '#FFFFFF', 'grid': '#F0F2F6' } def enhance_figure(self, fig): """Apply consistent styling to plotly figures""" fig.update_layout( plot_bgcolor=self.colors['background'], paper_bgcolor=self.colors['background'], font_family="Arial", title_font_size=20, title_x=0.5, legend_title_font_size=14, legend_font_size=12, showlegend=True ) fig.update_xaxes(gridcolor=self.colors['grid']) fig.update_yaxes(gridcolor=self.colors['grid']) return fig def get_all_properties(self) -> pd.DataFrame: """Fetch all properties and convert to DataFrame.""" try: # Get all documents instead of using search results = self.search_system.collection.get(include=['metadatas']) if not results or not results['metadatas']: return pd.DataFrame() # Convert to DataFrame data = [] for metadata in results['metadatas']: try: # Clean numeric values price = float(str(metadata['price']).replace(',', '')) area = float(str(metadata['total_area']).replace(',', '')) data.append({ 'UnitCode': str(metadata['unit_code']), 'UnitType': metadata['unit_type'].title(), 'Floor': str(metadata['floor']), 'Developer': metadata['developer'].title(), 'TotalArea': area, 'AskingPrice': price, 'PricePerSqft': round(price / area, 2) if area > 0 else 0, 'View': metadata['view'].title(), 'Source': metadata['source_file'], 'IndexedDate': pd.to_datetime(metadata['indexed_date']) }) except Exception as e: continue df = pd.DataFrame(data) return df except Exception as e: st.error(f"Error fetching property data: {str(e)}") return pd.DataFrame() def render_analytics_dashboard(self): st.title("📊 Real Estate Analytics Hub") # Get and prepare data df = self.get_all_properties() if df.empty: st.warning("No data available for analysis") return # Create analysis tabs tabs = st.tabs([ "💰 Price Analysis", "🌡️ Market Heatmap", "🎯 Portfolio Breakdown", "📈 Time Analysis" ]) with tabs[0]: self.render_price_analysis(df) with tabs[1]: self.render_market_heatmap(df) with tabs[2]: self.render_property_sunburst(df) with tabs[3]: self.render_time_analysis(df) def render_summary_stats(self, df: pd.DataFrame): """Render summary statistics cards.""" col1, col2, col3, col4 = st.columns(4) with col1: st.metric( "Total Properties", f"{len(df):,}", f"{len(df.UnitType.unique())} types" ) with col2: total_value = df['AskingPrice'].sum() st.metric( "Total Value", f"{total_value/1e9:.2f}B AED", f"Avg: {df['AskingPrice'].mean()/1e6:.1f}M" ) with col3: avg_price_sqft = df['PricePerSqft'].mean() st.metric( "Avg Price/Sqft", f"{avg_price_sqft:,.0f} AED", f"±{df['PricePerSqft'].std():,.0f}" ) with col4: st.metric( "Developers", f"{df['Developer'].nunique()}", f"{df['View'].nunique()} views" ) def render_price_analysis(self, df: pd.DataFrame): st.subheader("Advanced Price Analysis") col1, col2 = st.columns(2) with col1: # Enhanced Price Distribution with Price Categories df['PriceCategory'] = pd.qcut(df['AskingPrice'], q=5, labels=['Entry', 'Mid-Low', 'Mid', 'Mid-High', 'Luxury']) fig = px.violin(df, x='PriceCategory', y='AskingPrice', box=True, points="all", color='PriceCategory', title='Price Distribution by Segment', color_discrete_sequence=self.colors['primary']) st.plotly_chart(self.enhance_figure(fig), use_container_width=True) with col2: # Price per Sqft vs Total Area Scatter fig = px.scatter(df, x='TotalArea', y='PricePerSqft', color='UnitType', size='AskingPrice', hover_data=['Developer', 'View'], title='Price/Sqft vs Area Analysis', color_discrete_sequence=self.colors['primary']) st.plotly_chart(self.enhance_figure(fig), use_container_width=True) def render_market_heatmap(self, df: pd.DataFrame): st.subheader("Market Heatmap Analysis") # Create price heatmap by property type and view pivot_data = df.pivot_table( values='PricePerSqft', index='UnitType', columns='View', aggfunc='mean' ).round(0) fig = go.Figure(data=go.Heatmap( z=pivot_data.values, x=pivot_data.columns, y=pivot_data.index, colorscale=self.colors['primary'], text=pivot_data.values.round(0), texttemplate='%{text:,.0f}', textfont={'size': 10}, hoverongaps=False )) fig.update_layout( title='Price/Sqft Heatmap by Property Type and View', xaxis_title='View', yaxis_title='Property Type' ) st.plotly_chart(self.enhance_figure(fig), use_container_width=True) def render_property_sunburst(self, df: pd.DataFrame): st.subheader("Property Portfolio Breakdown") fig = px.sunburst( df, path=['Developer', 'UnitType', 'View'], values='AskingPrice', color='PricePerSqft', color_continuous_scale=self.colors['primary'], title='Portfolio Hierarchy Analysis' ) st.plotly_chart(self.enhance_figure(fig), use_container_width=True) def render_time_analysis(self, df: pd.DataFrame): st.subheader("Temporal Market Analysis") # Prepare time series data df['Month'] = pd.to_datetime(df['IndexedDate']).dt.to_period('M') time_data = df.groupby('Month').agg({ 'AskingPrice': ['mean', 'count', 'std'], 'PricePerSqft': 'mean' }).reset_index() time_data['Month'] = time_data['Month'].astype(str) # Create combined trend analysis fig = go.Figure() # Add price trend fig.add_trace(go.Scatter( x=time_data['Month'], y=time_data[('AskingPrice', 'mean')], name='Avg Price', line=dict(color=self.colors['primary'][0]), yaxis='y' )) # Add volume bars fig.add_trace(go.Bar( x=time_data['Month'], y=time_data[('AskingPrice', 'count')], name='Volume', marker_color=self.colors['accent'], opacity=0.3, yaxis='y2' )) # Update layout with dual axes fig.update_layout( title='Price Trends with Trading Volume', yaxis=dict(title='Average Price (AED)'), yaxis2=dict(title='Number of Properties', overlaying='y', side='right'), hovermode='x unified' ) st.plotly_chart(self.enhance_figure(fig), use_container_width=True) def render_property_type_analysis(self, df: pd.DataFrame): """Render property type analysis section.""" st.subheader("Property Type Analysis") col1, col2 = st.columns(2) with col1: # Property Type Distribution type_dist = df['UnitType'].value_counts() fig_types = px.pie( values=type_dist.values, names=type_dist.index, title='Property Type Distribution', hole=0.4, color_discrete_sequence=px.colors.sequential.Blues_r ) fig_types.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') st.plotly_chart(fig_types, use_container_width=True) with col2: # Average Price by Type avg_price_type = df.groupby('UnitType')['AskingPrice'].mean().sort_values(ascending=True) fig_avg_price = px.bar( x=avg_price_type.values, y=avg_price_type.index, orientation='h', title='Average Price by Property Type', labels={'x': 'Average Price (AED)', 'y': 'Property Type'}, color_discrete_sequence=['#4a90e2'] ) fig_avg_price.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') st.plotly_chart(fig_avg_price, use_container_width=True) # Type Summary Table type_summary = df.groupby('UnitType').agg({ 'UnitCode': 'count', 'AskingPrice': ['mean', 'min', 'max'], 'TotalArea': 'mean', 'PricePerSqft': 'mean' }).round(2) type_summary.columns = ['Count', 'Avg Price', 'Min Price', 'Max Price', 'Avg Area', 'Avg Price/Sqft'] st.dataframe(type_summary, use_container_width=True) def render_floor_analysis(self, df: pd.DataFrame): """Render floor analysis section.""" st.subheader("Floor Distribution Analysis") # Normalize floor values df['NormalizedFloor'] = pd.to_numeric(df['Floor'].replace( {'G': '0', 'B': '-1', 'M': '1', 'P': '2'}, regex=True ), errors='coerce') col1, col2 = st.columns(2) with col1: # Floor Distribution floor_dist = df['NormalizedFloor'].value_counts().sort_index() fig_floor = px.bar( x=floor_dist.index, y=floor_dist.values, title='Floor Distribution', labels={'x': 'Floor', 'y': 'Count'}, color_discrete_sequence=['#4a90e2'] ) fig_floor.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') st.plotly_chart(fig_floor, use_container_width=True) with col2: # Price vs Floor fig_price_floor = px.scatter( df, x='NormalizedFloor', y='AskingPrice', title='Price vs Floor Level', labels={'NormalizedFloor': 'Floor', 'AskingPrice': 'Price (AED)'}, trendline="ols", color_discrete_sequence=['#2ecc71'] ) fig_price_floor.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') st.plotly_chart(fig_price_floor, use_container_width=True) # Floor Level Premium Analysis floor_summary = df.groupby(pd.qcut(df['NormalizedFloor'].fillna(0), q=5), observed=False).agg({ 'UnitCode': 'count', 'AskingPrice': 'mean', 'PricePerSqft': 'mean' }).round(2) floor_summary.columns = ['Count', 'Avg Price', 'Avg Price/Sqft'] st.subheader("Floor Level Premium Analysis") st.dataframe(floor_summary, use_container_width=True) def render_developer_analysis(self, df: pd.DataFrame): """Render developer analysis section.""" st.subheader("Developer Analysis") col1, col2 = st.columns(2) with col1: # Developer Market Share dev_share = df.groupby('Developer')['UnitCode'].count().sort_values(ascending=True) fig_dev = px.bar( x=dev_share.values, y=dev_share.index, orientation='h', title='Developer Market Share', labels={'x': 'Number of Properties', 'y': 'Developer'}, color_discrete_sequence=['#4a90e2'] ) fig_dev.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') st.plotly_chart(fig_dev, use_container_width=True) with col2: # Average Price by Developer avg_price_dev = df.groupby('Developer')['PricePerSqft'].mean().sort_values(ascending=True) fig_dev_price = px.bar( x=avg_price_dev.values, y=avg_price_dev.index, orientation='h', title='Average Price/Sqft by Developer', labels={'x': 'Average Price/Sqft (AED)', 'y': 'Developer'}, color_discrete_sequence=['#2ecc71'] ) fig_dev_price.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') st.plotly_chart(fig_dev_price, use_container_width=True) # Developer Performance Metrics dev_metrics = df.groupby('Developer', observed=False).agg({ 'UnitCode': 'count', 'AskingPrice': ['mean', 'min', 'max'], 'TotalArea': 'mean', 'PricePerSqft': ['mean', 'std'] }).round(2) dev_metrics.columns = ['Properties', 'Avg Price', 'Min Price', 'Max Price', 'Avg Area', 'Avg Price/Sqft', 'Price/Sqft Std'] st.dataframe(dev_metrics, use_container_width=True) def render_location_insights(self, df: pd.DataFrame): """Render location and view analysis section.""" st.subheader("Location and View Analysis") col1, col2 = st.columns(2) with col1: # View Distribution view_dist = df['View'].value_counts() fig_view = px.pie( values=view_dist.values, names=view_dist.index, title='View Distribution', hole=0.4, color_discrete_sequence=px.colors.sequential.Blues_r ) fig_view.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') st.plotly_chart(fig_view, use_container_width=True) with col2: # Price Premium by View avg_price_view = df.groupby('View')['PricePerSqft'].mean().sort_values(ascending=True) fig_view_price = px.bar( x=avg_price_view.values, y=avg_price_view.index, orientation='h', title='Average Price/Sqft by View', labels={'x': 'Average Price/Sqft (AED)', 'y': 'View'}, color_discrete_sequence=['#4a90e2'] ) fig_view_price.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') st.plotly_chart(fig_view_price, use_container_width=True) # View Analysis Table view_analysis = df.groupby('View', observed=False).agg({ 'UnitCode': 'count', 'AskingPrice': ['mean', 'min', 'max'], 'PricePerSqft': ['mean', 'std'] }).round(2) view_analysis.columns = ['Count', 'Avg Price', 'Min Price', 'Max Price', 'Avg Price/Sqft', 'Price/Sqft Std'] st.dataframe(view_analysis, use_container_width=True) def render_market_trends(self, df: pd.DataFrame): """Render market trends analysis section.""" st.subheader("Market Trends Analysis") # Convert IndexedDate to datetime df['IndexedDate'] = pd.to_datetime(df['IndexedDate']) # Group by month and calculate average price df['Month'] = df['IndexedDate'].dt.to_period('M') monthly_trends = df.groupby('Month').agg({ 'AskingPrice': 'mean', 'UnitCode': 'count' }).reset_index() monthly_trends['Month'] = monthly_trends['Month'].dt.to_timestamp() # Plot price trends fig_trends = px.line( monthly_trends, x='Month', y='AskingPrice', title='Average Property Price Over Time', labels={'AskingPrice': 'Average Price (AED)', 'Month': 'Month'}, color_discrete_sequence=['#4a90e2'] ) fig_trends.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') st.plotly_chart(fig_trends, use_container_width=True) # Plot property count trends fig_count = px.line( monthly_trends, x='Month', y='UnitCode', title='Number of Properties Listed Over Time', labels={'UnitCode': 'Number of Properties', 'Month': 'Month'}, color_discrete_sequence=['#2ecc71'] ) fig_count.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') st.plotly_chart(fig_count, use_container_width=True) def render_analytics_tab(search_system): """Main function to render the analytics tab.""" analytics = RealEstateAnalytics(search_system) analytics.render_analytics_dashboard()