Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import plotly.express as px | |
import plotly.graph_objects as go | |
class RealEstateAnalytics: | |
def __init__(self, search_system): | |
self.search_system = search_system | |
self.colors = { | |
'primary': ['#2E3B4E', '#405D8C', '#557ABC', '#7295D7', '#9FB5E6'], | |
'accent': '#FF6B6B', | |
'background': '#FFFFFF', | |
'grid': '#F0F2F6' | |
} | |
def enhance_figure(self, fig): | |
"""Apply consistent styling to plotly figures""" | |
fig.update_layout( | |
plot_bgcolor=self.colors['background'], | |
paper_bgcolor=self.colors['background'], | |
font_family="Arial", | |
title_font_size=20, | |
title_x=0.5, | |
legend_title_font_size=14, | |
legend_font_size=12, | |
showlegend=True | |
) | |
fig.update_xaxes(gridcolor=self.colors['grid']) | |
fig.update_yaxes(gridcolor=self.colors['grid']) | |
return fig | |
def get_all_properties(self) -> pd.DataFrame: | |
"""Fetch all properties and convert to DataFrame.""" | |
try: | |
# Get all documents instead of using search | |
results = self.search_system.collection.get(include=['metadatas']) | |
if not results or not results['metadatas']: | |
return pd.DataFrame() | |
# Convert to DataFrame | |
data = [] | |
for metadata in results['metadatas']: | |
try: | |
# Clean numeric values | |
price = float(str(metadata['price']).replace(',', '')) | |
area = float(str(metadata['total_area']).replace(',', '')) | |
data.append({ | |
'UnitCode': str(metadata['unit_code']), | |
'UnitType': metadata['unit_type'].title(), | |
'Floor': str(metadata['floor']), | |
'Developer': metadata['developer'].title(), | |
'TotalArea': area, | |
'AskingPrice': price, | |
'PricePerSqft': round(price / area, 2) if area > 0 else 0, | |
'View': metadata['view'].title(), | |
'Source': metadata['source_file'], | |
'IndexedDate': pd.to_datetime(metadata['indexed_date']) | |
}) | |
except Exception as e: | |
continue | |
df = pd.DataFrame(data) | |
return df | |
except Exception as e: | |
st.error(f"Error fetching property data: {str(e)}") | |
return pd.DataFrame() | |
def render_analytics_dashboard(self): | |
st.title("📊 Real Estate Analytics Hub") | |
# Get and prepare data | |
df = self.get_all_properties() | |
if df.empty: | |
st.warning("No data available for analysis") | |
return | |
# Create analysis tabs | |
tabs = st.tabs([ | |
"💰 Price Analysis", | |
"🌡️ Market Heatmap", | |
"🎯 Portfolio Breakdown", | |
"📈 Time Analysis" | |
]) | |
with tabs[0]: | |
self.render_price_analysis(df) | |
with tabs[1]: | |
self.render_market_heatmap(df) | |
with tabs[2]: | |
self.render_property_sunburst(df) | |
with tabs[3]: | |
self.render_time_analysis(df) | |
def render_summary_stats(self, df: pd.DataFrame): | |
"""Render summary statistics cards.""" | |
col1, col2, col3, col4 = st.columns(4) | |
with col1: | |
st.metric( | |
"Total Properties", | |
f"{len(df):,}", | |
f"{len(df.UnitType.unique())} types" | |
) | |
with col2: | |
total_value = df['AskingPrice'].sum() | |
st.metric( | |
"Total Value", | |
f"{total_value/1e9:.2f}B AED", | |
f"Avg: {df['AskingPrice'].mean()/1e6:.1f}M" | |
) | |
with col3: | |
avg_price_sqft = df['PricePerSqft'].mean() | |
st.metric( | |
"Avg Price/Sqft", | |
f"{avg_price_sqft:,.0f} AED", | |
f"±{df['PricePerSqft'].std():,.0f}" | |
) | |
with col4: | |
st.metric( | |
"Developers", | |
f"{df['Developer'].nunique()}", | |
f"{df['View'].nunique()} views" | |
) | |
def render_price_analysis(self, df: pd.DataFrame): | |
st.subheader("Advanced Price Analysis") | |
col1, col2 = st.columns(2) | |
with col1: | |
# Enhanced Price Distribution with Price Categories | |
df['PriceCategory'] = pd.qcut(df['AskingPrice'], | |
q=5, | |
labels=['Entry', 'Mid-Low', 'Mid', 'Mid-High', 'Luxury']) | |
fig = px.violin(df, | |
x='PriceCategory', | |
y='AskingPrice', | |
box=True, | |
points="all", | |
color='PriceCategory', | |
title='Price Distribution by Segment', | |
color_discrete_sequence=self.colors['primary']) | |
st.plotly_chart(self.enhance_figure(fig), use_container_width=True) | |
with col2: | |
# Price per Sqft vs Total Area Scatter | |
fig = px.scatter(df, | |
x='TotalArea', | |
y='PricePerSqft', | |
color='UnitType', | |
size='AskingPrice', | |
hover_data=['Developer', 'View'], | |
title='Price/Sqft vs Area Analysis', | |
color_discrete_sequence=self.colors['primary']) | |
st.plotly_chart(self.enhance_figure(fig), use_container_width=True) | |
def render_market_heatmap(self, df: pd.DataFrame): | |
st.subheader("Market Heatmap Analysis") | |
# Create price heatmap by property type and view | |
pivot_data = df.pivot_table( | |
values='PricePerSqft', | |
index='UnitType', | |
columns='View', | |
aggfunc='mean' | |
).round(0) | |
fig = go.Figure(data=go.Heatmap( | |
z=pivot_data.values, | |
x=pivot_data.columns, | |
y=pivot_data.index, | |
colorscale=self.colors['primary'], | |
text=pivot_data.values.round(0), | |
texttemplate='%{text:,.0f}', | |
textfont={'size': 10}, | |
hoverongaps=False | |
)) | |
fig.update_layout( | |
title='Price/Sqft Heatmap by Property Type and View', | |
xaxis_title='View', | |
yaxis_title='Property Type' | |
) | |
st.plotly_chart(self.enhance_figure(fig), use_container_width=True) | |
def render_property_sunburst(self, df: pd.DataFrame): | |
st.subheader("Property Portfolio Breakdown") | |
fig = px.sunburst( | |
df, | |
path=['Developer', 'UnitType', 'View'], | |
values='AskingPrice', | |
color='PricePerSqft', | |
color_continuous_scale=self.colors['primary'], | |
title='Portfolio Hierarchy Analysis' | |
) | |
st.plotly_chart(self.enhance_figure(fig), use_container_width=True) | |
def render_time_analysis(self, df: pd.DataFrame): | |
st.subheader("Temporal Market Analysis") | |
# Prepare time series data | |
df['Month'] = pd.to_datetime(df['IndexedDate']).dt.to_period('M') | |
time_data = df.groupby('Month').agg({ | |
'AskingPrice': ['mean', 'count', 'std'], | |
'PricePerSqft': 'mean' | |
}).reset_index() | |
time_data['Month'] = time_data['Month'].astype(str) | |
# Create combined trend analysis | |
fig = go.Figure() | |
# Add price trend | |
fig.add_trace(go.Scatter( | |
x=time_data['Month'], | |
y=time_data[('AskingPrice', 'mean')], | |
name='Avg Price', | |
line=dict(color=self.colors['primary'][0]), | |
yaxis='y' | |
)) | |
# Add volume bars | |
fig.add_trace(go.Bar( | |
x=time_data['Month'], | |
y=time_data[('AskingPrice', 'count')], | |
name='Volume', | |
marker_color=self.colors['accent'], | |
opacity=0.3, | |
yaxis='y2' | |
)) | |
# Update layout with dual axes | |
fig.update_layout( | |
title='Price Trends with Trading Volume', | |
yaxis=dict(title='Average Price (AED)'), | |
yaxis2=dict(title='Number of Properties', overlaying='y', side='right'), | |
hovermode='x unified' | |
) | |
st.plotly_chart(self.enhance_figure(fig), use_container_width=True) | |
def render_property_type_analysis(self, df: pd.DataFrame): | |
"""Render property type analysis section.""" | |
st.subheader("Property Type Analysis") | |
col1, col2 = st.columns(2) | |
with col1: | |
# Property Type Distribution | |
type_dist = df['UnitType'].value_counts() | |
fig_types = px.pie( | |
values=type_dist.values, | |
names=type_dist.index, | |
title='Property Type Distribution', | |
hole=0.4, | |
color_discrete_sequence=px.colors.sequential.Blues_r | |
) | |
fig_types.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') | |
st.plotly_chart(fig_types, use_container_width=True) | |
with col2: | |
# Average Price by Type | |
avg_price_type = df.groupby('UnitType')['AskingPrice'].mean().sort_values(ascending=True) | |
fig_avg_price = px.bar( | |
x=avg_price_type.values, | |
y=avg_price_type.index, | |
orientation='h', | |
title='Average Price by Property Type', | |
labels={'x': 'Average Price (AED)', 'y': 'Property Type'}, | |
color_discrete_sequence=['#4a90e2'] | |
) | |
fig_avg_price.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') | |
st.plotly_chart(fig_avg_price, use_container_width=True) | |
# Type Summary Table | |
type_summary = df.groupby('UnitType').agg({ | |
'UnitCode': 'count', | |
'AskingPrice': ['mean', 'min', 'max'], | |
'TotalArea': 'mean', | |
'PricePerSqft': 'mean' | |
}).round(2) | |
type_summary.columns = ['Count', 'Avg Price', 'Min Price', 'Max Price', 'Avg Area', 'Avg Price/Sqft'] | |
st.dataframe(type_summary, use_container_width=True) | |
def render_floor_analysis(self, df: pd.DataFrame): | |
"""Render floor analysis section.""" | |
st.subheader("Floor Distribution Analysis") | |
# Normalize floor values | |
df['NormalizedFloor'] = pd.to_numeric(df['Floor'].replace( | |
{'G': '0', 'B': '-1', 'M': '1', 'P': '2'}, regex=True | |
), errors='coerce') | |
col1, col2 = st.columns(2) | |
with col1: | |
# Floor Distribution | |
floor_dist = df['NormalizedFloor'].value_counts().sort_index() | |
fig_floor = px.bar( | |
x=floor_dist.index, | |
y=floor_dist.values, | |
title='Floor Distribution', | |
labels={'x': 'Floor', 'y': 'Count'}, | |
color_discrete_sequence=['#4a90e2'] | |
) | |
fig_floor.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') | |
st.plotly_chart(fig_floor, use_container_width=True) | |
with col2: | |
# Price vs Floor | |
fig_price_floor = px.scatter( | |
df, | |
x='NormalizedFloor', | |
y='AskingPrice', | |
title='Price vs Floor Level', | |
labels={'NormalizedFloor': 'Floor', 'AskingPrice': 'Price (AED)'}, | |
trendline="ols", | |
color_discrete_sequence=['#2ecc71'] | |
) | |
fig_price_floor.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') | |
st.plotly_chart(fig_price_floor, use_container_width=True) | |
# Floor Level Premium Analysis | |
floor_summary = df.groupby(pd.qcut(df['NormalizedFloor'].fillna(0), q=5), observed=False).agg({ | |
'UnitCode': 'count', | |
'AskingPrice': 'mean', | |
'PricePerSqft': 'mean' | |
}).round(2) | |
floor_summary.columns = ['Count', 'Avg Price', 'Avg Price/Sqft'] | |
st.subheader("Floor Level Premium Analysis") | |
st.dataframe(floor_summary, use_container_width=True) | |
def render_developer_analysis(self, df: pd.DataFrame): | |
"""Render developer analysis section.""" | |
st.subheader("Developer Analysis") | |
col1, col2 = st.columns(2) | |
with col1: | |
# Developer Market Share | |
dev_share = df.groupby('Developer')['UnitCode'].count().sort_values(ascending=True) | |
fig_dev = px.bar( | |
x=dev_share.values, | |
y=dev_share.index, | |
orientation='h', | |
title='Developer Market Share', | |
labels={'x': 'Number of Properties', 'y': 'Developer'}, | |
color_discrete_sequence=['#4a90e2'] | |
) | |
fig_dev.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') | |
st.plotly_chart(fig_dev, use_container_width=True) | |
with col2: | |
# Average Price by Developer | |
avg_price_dev = df.groupby('Developer')['PricePerSqft'].mean().sort_values(ascending=True) | |
fig_dev_price = px.bar( | |
x=avg_price_dev.values, | |
y=avg_price_dev.index, | |
orientation='h', | |
title='Average Price/Sqft by Developer', | |
labels={'x': 'Average Price/Sqft (AED)', 'y': 'Developer'}, | |
color_discrete_sequence=['#2ecc71'] | |
) | |
fig_dev_price.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') | |
st.plotly_chart(fig_dev_price, use_container_width=True) | |
# Developer Performance Metrics | |
dev_metrics = df.groupby('Developer', observed=False).agg({ | |
'UnitCode': 'count', | |
'AskingPrice': ['mean', 'min', 'max'], | |
'TotalArea': 'mean', | |
'PricePerSqft': ['mean', 'std'] | |
}).round(2) | |
dev_metrics.columns = ['Properties', 'Avg Price', 'Min Price', 'Max Price', | |
'Avg Area', 'Avg Price/Sqft', 'Price/Sqft Std'] | |
st.dataframe(dev_metrics, use_container_width=True) | |
def render_location_insights(self, df: pd.DataFrame): | |
"""Render location and view analysis section.""" | |
st.subheader("Location and View Analysis") | |
col1, col2 = st.columns(2) | |
with col1: | |
# View Distribution | |
view_dist = df['View'].value_counts() | |
fig_view = px.pie( | |
values=view_dist.values, | |
names=view_dist.index, | |
title='View Distribution', | |
hole=0.4, | |
color_discrete_sequence=px.colors.sequential.Blues_r | |
) | |
fig_view.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') | |
st.plotly_chart(fig_view, use_container_width=True) | |
with col2: | |
# Price Premium by View | |
avg_price_view = df.groupby('View')['PricePerSqft'].mean().sort_values(ascending=True) | |
fig_view_price = px.bar( | |
x=avg_price_view.values, | |
y=avg_price_view.index, | |
orientation='h', | |
title='Average Price/Sqft by View', | |
labels={'x': 'Average Price/Sqft (AED)', 'y': 'View'}, | |
color_discrete_sequence=['#4a90e2'] | |
) | |
fig_view_price.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') | |
st.plotly_chart(fig_view_price, use_container_width=True) | |
# View Analysis Table | |
view_analysis = df.groupby('View', observed=False).agg({ | |
'UnitCode': 'count', | |
'AskingPrice': ['mean', 'min', 'max'], | |
'PricePerSqft': ['mean', 'std'] | |
}).round(2) | |
view_analysis.columns = ['Count', 'Avg Price', 'Min Price', 'Max Price', | |
'Avg Price/Sqft', 'Price/Sqft Std'] | |
st.dataframe(view_analysis, use_container_width=True) | |
def render_market_trends(self, df: pd.DataFrame): | |
"""Render market trends analysis section.""" | |
st.subheader("Market Trends Analysis") | |
# Convert IndexedDate to datetime | |
df['IndexedDate'] = pd.to_datetime(df['IndexedDate']) | |
# Group by month and calculate average price | |
df['Month'] = df['IndexedDate'].dt.to_period('M') | |
monthly_trends = df.groupby('Month').agg({ | |
'AskingPrice': 'mean', | |
'UnitCode': 'count' | |
}).reset_index() | |
monthly_trends['Month'] = monthly_trends['Month'].dt.to_timestamp() | |
# Plot price trends | |
fig_trends = px.line( | |
monthly_trends, | |
x='Month', | |
y='AskingPrice', | |
title='Average Property Price Over Time', | |
labels={'AskingPrice': 'Average Price (AED)', 'Month': 'Month'}, | |
color_discrete_sequence=['#4a90e2'] | |
) | |
fig_trends.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') | |
st.plotly_chart(fig_trends, use_container_width=True) | |
# Plot property count trends | |
fig_count = px.line( | |
monthly_trends, | |
x='Month', | |
y='UnitCode', | |
title='Number of Properties Listed Over Time', | |
labels={'UnitCode': 'Number of Properties', 'Month': 'Month'}, | |
color_discrete_sequence=['#2ecc71'] | |
) | |
fig_count.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6') | |
st.plotly_chart(fig_count, use_container_width=True) | |
def render_analytics_tab(search_system): | |
"""Main function to render the analytics tab.""" | |
analytics = RealEstateAnalytics(search_system) | |
analytics.render_analytics_dashboard() |