vartur-search-engine / src /analytics.py
agoor97's picture
Add application files
9e22989
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
class RealEstateAnalytics:
def __init__(self, search_system):
self.search_system = search_system
self.colors = {
'primary': ['#2E3B4E', '#405D8C', '#557ABC', '#7295D7', '#9FB5E6'],
'accent': '#FF6B6B',
'background': '#FFFFFF',
'grid': '#F0F2F6'
}
def enhance_figure(self, fig):
"""Apply consistent styling to plotly figures"""
fig.update_layout(
plot_bgcolor=self.colors['background'],
paper_bgcolor=self.colors['background'],
font_family="Arial",
title_font_size=20,
title_x=0.5,
legend_title_font_size=14,
legend_font_size=12,
showlegend=True
)
fig.update_xaxes(gridcolor=self.colors['grid'])
fig.update_yaxes(gridcolor=self.colors['grid'])
return fig
def get_all_properties(self) -> pd.DataFrame:
"""Fetch all properties and convert to DataFrame."""
try:
# Get all documents instead of using search
results = self.search_system.collection.get(include=['metadatas'])
if not results or not results['metadatas']:
return pd.DataFrame()
# Convert to DataFrame
data = []
for metadata in results['metadatas']:
try:
# Clean numeric values
price = float(str(metadata['price']).replace(',', ''))
area = float(str(metadata['total_area']).replace(',', ''))
data.append({
'UnitCode': str(metadata['unit_code']),
'UnitType': metadata['unit_type'].title(),
'Floor': str(metadata['floor']),
'Developer': metadata['developer'].title(),
'TotalArea': area,
'AskingPrice': price,
'PricePerSqft': round(price / area, 2) if area > 0 else 0,
'View': metadata['view'].title(),
'Source': metadata['source_file'],
'IndexedDate': pd.to_datetime(metadata['indexed_date'])
})
except Exception as e:
continue
df = pd.DataFrame(data)
return df
except Exception as e:
st.error(f"Error fetching property data: {str(e)}")
return pd.DataFrame()
def render_analytics_dashboard(self):
st.title("📊 Real Estate Analytics Hub")
# Get and prepare data
df = self.get_all_properties()
if df.empty:
st.warning("No data available for analysis")
return
# Create analysis tabs
tabs = st.tabs([
"💰 Price Analysis",
"🌡️ Market Heatmap",
"🎯 Portfolio Breakdown",
"📈 Time Analysis"
])
with tabs[0]:
self.render_price_analysis(df)
with tabs[1]:
self.render_market_heatmap(df)
with tabs[2]:
self.render_property_sunburst(df)
with tabs[3]:
self.render_time_analysis(df)
def render_summary_stats(self, df: pd.DataFrame):
"""Render summary statistics cards."""
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric(
"Total Properties",
f"{len(df):,}",
f"{len(df.UnitType.unique())} types"
)
with col2:
total_value = df['AskingPrice'].sum()
st.metric(
"Total Value",
f"{total_value/1e9:.2f}B AED",
f"Avg: {df['AskingPrice'].mean()/1e6:.1f}M"
)
with col3:
avg_price_sqft = df['PricePerSqft'].mean()
st.metric(
"Avg Price/Sqft",
f"{avg_price_sqft:,.0f} AED",
f"±{df['PricePerSqft'].std():,.0f}"
)
with col4:
st.metric(
"Developers",
f"{df['Developer'].nunique()}",
f"{df['View'].nunique()} views"
)
def render_price_analysis(self, df: pd.DataFrame):
st.subheader("Advanced Price Analysis")
col1, col2 = st.columns(2)
with col1:
# Enhanced Price Distribution with Price Categories
df['PriceCategory'] = pd.qcut(df['AskingPrice'],
q=5,
labels=['Entry', 'Mid-Low', 'Mid', 'Mid-High', 'Luxury'])
fig = px.violin(df,
x='PriceCategory',
y='AskingPrice',
box=True,
points="all",
color='PriceCategory',
title='Price Distribution by Segment',
color_discrete_sequence=self.colors['primary'])
st.plotly_chart(self.enhance_figure(fig), use_container_width=True)
with col2:
# Price per Sqft vs Total Area Scatter
fig = px.scatter(df,
x='TotalArea',
y='PricePerSqft',
color='UnitType',
size='AskingPrice',
hover_data=['Developer', 'View'],
title='Price/Sqft vs Area Analysis',
color_discrete_sequence=self.colors['primary'])
st.plotly_chart(self.enhance_figure(fig), use_container_width=True)
def render_market_heatmap(self, df: pd.DataFrame):
st.subheader("Market Heatmap Analysis")
# Create price heatmap by property type and view
pivot_data = df.pivot_table(
values='PricePerSqft',
index='UnitType',
columns='View',
aggfunc='mean'
).round(0)
fig = go.Figure(data=go.Heatmap(
z=pivot_data.values,
x=pivot_data.columns,
y=pivot_data.index,
colorscale=self.colors['primary'],
text=pivot_data.values.round(0),
texttemplate='%{text:,.0f}',
textfont={'size': 10},
hoverongaps=False
))
fig.update_layout(
title='Price/Sqft Heatmap by Property Type and View',
xaxis_title='View',
yaxis_title='Property Type'
)
st.plotly_chart(self.enhance_figure(fig), use_container_width=True)
def render_property_sunburst(self, df: pd.DataFrame):
st.subheader("Property Portfolio Breakdown")
fig = px.sunburst(
df,
path=['Developer', 'UnitType', 'View'],
values='AskingPrice',
color='PricePerSqft',
color_continuous_scale=self.colors['primary'],
title='Portfolio Hierarchy Analysis'
)
st.plotly_chart(self.enhance_figure(fig), use_container_width=True)
def render_time_analysis(self, df: pd.DataFrame):
st.subheader("Temporal Market Analysis")
# Prepare time series data
df['Month'] = pd.to_datetime(df['IndexedDate']).dt.to_period('M')
time_data = df.groupby('Month').agg({
'AskingPrice': ['mean', 'count', 'std'],
'PricePerSqft': 'mean'
}).reset_index()
time_data['Month'] = time_data['Month'].astype(str)
# Create combined trend analysis
fig = go.Figure()
# Add price trend
fig.add_trace(go.Scatter(
x=time_data['Month'],
y=time_data[('AskingPrice', 'mean')],
name='Avg Price',
line=dict(color=self.colors['primary'][0]),
yaxis='y'
))
# Add volume bars
fig.add_trace(go.Bar(
x=time_data['Month'],
y=time_data[('AskingPrice', 'count')],
name='Volume',
marker_color=self.colors['accent'],
opacity=0.3,
yaxis='y2'
))
# Update layout with dual axes
fig.update_layout(
title='Price Trends with Trading Volume',
yaxis=dict(title='Average Price (AED)'),
yaxis2=dict(title='Number of Properties', overlaying='y', side='right'),
hovermode='x unified'
)
st.plotly_chart(self.enhance_figure(fig), use_container_width=True)
def render_property_type_analysis(self, df: pd.DataFrame):
"""Render property type analysis section."""
st.subheader("Property Type Analysis")
col1, col2 = st.columns(2)
with col1:
# Property Type Distribution
type_dist = df['UnitType'].value_counts()
fig_types = px.pie(
values=type_dist.values,
names=type_dist.index,
title='Property Type Distribution',
hole=0.4,
color_discrete_sequence=px.colors.sequential.Blues_r
)
fig_types.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6')
st.plotly_chart(fig_types, use_container_width=True)
with col2:
# Average Price by Type
avg_price_type = df.groupby('UnitType')['AskingPrice'].mean().sort_values(ascending=True)
fig_avg_price = px.bar(
x=avg_price_type.values,
y=avg_price_type.index,
orientation='h',
title='Average Price by Property Type',
labels={'x': 'Average Price (AED)', 'y': 'Property Type'},
color_discrete_sequence=['#4a90e2']
)
fig_avg_price.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6')
st.plotly_chart(fig_avg_price, use_container_width=True)
# Type Summary Table
type_summary = df.groupby('UnitType').agg({
'UnitCode': 'count',
'AskingPrice': ['mean', 'min', 'max'],
'TotalArea': 'mean',
'PricePerSqft': 'mean'
}).round(2)
type_summary.columns = ['Count', 'Avg Price', 'Min Price', 'Max Price', 'Avg Area', 'Avg Price/Sqft']
st.dataframe(type_summary, use_container_width=True)
def render_floor_analysis(self, df: pd.DataFrame):
"""Render floor analysis section."""
st.subheader("Floor Distribution Analysis")
# Normalize floor values
df['NormalizedFloor'] = pd.to_numeric(df['Floor'].replace(
{'G': '0', 'B': '-1', 'M': '1', 'P': '2'}, regex=True
), errors='coerce')
col1, col2 = st.columns(2)
with col1:
# Floor Distribution
floor_dist = df['NormalizedFloor'].value_counts().sort_index()
fig_floor = px.bar(
x=floor_dist.index,
y=floor_dist.values,
title='Floor Distribution',
labels={'x': 'Floor', 'y': 'Count'},
color_discrete_sequence=['#4a90e2']
)
fig_floor.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6')
st.plotly_chart(fig_floor, use_container_width=True)
with col2:
# Price vs Floor
fig_price_floor = px.scatter(
df,
x='NormalizedFloor',
y='AskingPrice',
title='Price vs Floor Level',
labels={'NormalizedFloor': 'Floor', 'AskingPrice': 'Price (AED)'},
trendline="ols",
color_discrete_sequence=['#2ecc71']
)
fig_price_floor.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6')
st.plotly_chart(fig_price_floor, use_container_width=True)
# Floor Level Premium Analysis
floor_summary = df.groupby(pd.qcut(df['NormalizedFloor'].fillna(0), q=5), observed=False).agg({
'UnitCode': 'count',
'AskingPrice': 'mean',
'PricePerSqft': 'mean'
}).round(2)
floor_summary.columns = ['Count', 'Avg Price', 'Avg Price/Sqft']
st.subheader("Floor Level Premium Analysis")
st.dataframe(floor_summary, use_container_width=True)
def render_developer_analysis(self, df: pd.DataFrame):
"""Render developer analysis section."""
st.subheader("Developer Analysis")
col1, col2 = st.columns(2)
with col1:
# Developer Market Share
dev_share = df.groupby('Developer')['UnitCode'].count().sort_values(ascending=True)
fig_dev = px.bar(
x=dev_share.values,
y=dev_share.index,
orientation='h',
title='Developer Market Share',
labels={'x': 'Number of Properties', 'y': 'Developer'},
color_discrete_sequence=['#4a90e2']
)
fig_dev.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6')
st.plotly_chart(fig_dev, use_container_width=True)
with col2:
# Average Price by Developer
avg_price_dev = df.groupby('Developer')['PricePerSqft'].mean().sort_values(ascending=True)
fig_dev_price = px.bar(
x=avg_price_dev.values,
y=avg_price_dev.index,
orientation='h',
title='Average Price/Sqft by Developer',
labels={'x': 'Average Price/Sqft (AED)', 'y': 'Developer'},
color_discrete_sequence=['#2ecc71']
)
fig_dev_price.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6')
st.plotly_chart(fig_dev_price, use_container_width=True)
# Developer Performance Metrics
dev_metrics = df.groupby('Developer', observed=False).agg({
'UnitCode': 'count',
'AskingPrice': ['mean', 'min', 'max'],
'TotalArea': 'mean',
'PricePerSqft': ['mean', 'std']
}).round(2)
dev_metrics.columns = ['Properties', 'Avg Price', 'Min Price', 'Max Price',
'Avg Area', 'Avg Price/Sqft', 'Price/Sqft Std']
st.dataframe(dev_metrics, use_container_width=True)
def render_location_insights(self, df: pd.DataFrame):
"""Render location and view analysis section."""
st.subheader("Location and View Analysis")
col1, col2 = st.columns(2)
with col1:
# View Distribution
view_dist = df['View'].value_counts()
fig_view = px.pie(
values=view_dist.values,
names=view_dist.index,
title='View Distribution',
hole=0.4,
color_discrete_sequence=px.colors.sequential.Blues_r
)
fig_view.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6')
st.plotly_chart(fig_view, use_container_width=True)
with col2:
# Price Premium by View
avg_price_view = df.groupby('View')['PricePerSqft'].mean().sort_values(ascending=True)
fig_view_price = px.bar(
x=avg_price_view.values,
y=avg_price_view.index,
orientation='h',
title='Average Price/Sqft by View',
labels={'x': 'Average Price/Sqft (AED)', 'y': 'View'},
color_discrete_sequence=['#4a90e2']
)
fig_view_price.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6')
st.plotly_chart(fig_view_price, use_container_width=True)
# View Analysis Table
view_analysis = df.groupby('View', observed=False).agg({
'UnitCode': 'count',
'AskingPrice': ['mean', 'min', 'max'],
'PricePerSqft': ['mean', 'std']
}).round(2)
view_analysis.columns = ['Count', 'Avg Price', 'Min Price', 'Max Price',
'Avg Price/Sqft', 'Price/Sqft Std']
st.dataframe(view_analysis, use_container_width=True)
def render_market_trends(self, df: pd.DataFrame):
"""Render market trends analysis section."""
st.subheader("Market Trends Analysis")
# Convert IndexedDate to datetime
df['IndexedDate'] = pd.to_datetime(df['IndexedDate'])
# Group by month and calculate average price
df['Month'] = df['IndexedDate'].dt.to_period('M')
monthly_trends = df.groupby('Month').agg({
'AskingPrice': 'mean',
'UnitCode': 'count'
}).reset_index()
monthly_trends['Month'] = monthly_trends['Month'].dt.to_timestamp()
# Plot price trends
fig_trends = px.line(
monthly_trends,
x='Month',
y='AskingPrice',
title='Average Property Price Over Time',
labels={'AskingPrice': 'Average Price (AED)', 'Month': 'Month'},
color_discrete_sequence=['#4a90e2']
)
fig_trends.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6')
st.plotly_chart(fig_trends, use_container_width=True)
# Plot property count trends
fig_count = px.line(
monthly_trends,
x='Month',
y='UnitCode',
title='Number of Properties Listed Over Time',
labels={'UnitCode': 'Number of Properties', 'Month': 'Month'},
color_discrete_sequence=['#2ecc71']
)
fig_count.update_layout(plot_bgcolor='#f0f2f6', paper_bgcolor='#f0f2f6')
st.plotly_chart(fig_count, use_container_width=True)
def render_analytics_tab(search_system):
"""Main function to render the analytics tab."""
analytics = RealEstateAnalytics(search_system)
analytics.render_analytics_dashboard()